Preâmbulo

library(titanic)
## Warning: package 'titanic' was built under R version 3.4.4
library(neuralnet)
## Warning: package 'neuralnet' was built under R version 3.4.4

Importação de e Tratamento dos Dados

titanic <- titanic_train

titanic <- titanic[c('Survived','Pclass','Sex','Age','Embarked','Fare','Parch','SibSp')]
sapply(titanic, function(x) sum(is.na(x)))
## Survived   Pclass      Sex      Age Embarked     Fare    Parch    SibSp 
##        0        0        0      177        0        0        0        0
titanic$Age[is.na(titanic$Age)] <- mean(titanic$Age,na.rm=T)
#full$Fare[is.na(full$Fare)] <- mean(full$Fare,na.rm=T)

titanic$Embarked <- as.factor(titanic$Embarked)
titanic$Sex <- as.factor(titanic$Sex)
summary(titanic)
##     Survived          Pclass          Sex           Age        Embarked
##  Min.   :0.0000   Min.   :1.000   female:314   Min.   : 0.42    :  2   
##  1st Qu.:0.0000   1st Qu.:2.000   male  :577   1st Qu.:22.00   C:168   
##  Median :0.0000   Median :3.000                Median :29.70   Q: 77   
##  Mean   :0.3838   Mean   :2.309                Mean   :29.70   S:644   
##  3rd Qu.:1.0000   3rd Qu.:3.000                3rd Qu.:35.00           
##  Max.   :1.0000   Max.   :3.000                Max.   :80.00           
##       Fare            Parch            SibSp      
##  Min.   :  0.00   Min.   :0.0000   Min.   :0.000  
##  1st Qu.:  7.91   1st Qu.:0.0000   1st Qu.:0.000  
##  Median : 14.45   Median :0.0000   Median :0.000  
##  Mean   : 32.20   Mean   :0.3816   Mean   :0.523  
##  3rd Qu.: 31.00   3rd Qu.:0.0000   3rd Qu.:1.000  
##  Max.   :512.33   Max.   :6.0000   Max.   :8.000

Separação em Dados de Treino e de Teste

data.train <- titanic[1:710,]
data.test  <- titanic[711:891,]

train.m <- model.matrix(~ Survived + Pclass + Sex+ Age + Fare + SibSp,data = data.train)
test.m  <- model.matrix(~ Survived + Pclass + Sex+ Age + Fare + SibSp,data = data.test)

Treino das Redes Neuronais com Função de Output Softmax

set.seed(0)
nn1 <- neuralnet(Survived ~ Pclass + Sexmale+ Age + Fare + SibSp, data=train.m, 
                 hidden=1, threshold=0.01, linear.output = F, act.fct = "logistic")
nn2 <- neuralnet(Survived ~ Pclass + Sexmale+ Age + Fare + SibSp, data=train.m, 
                 hidden=2, threshold=0.01, linear.output = F, act.fct = "logistic")
nn3 <- neuralnet(Survived ~ Pclass + Sexmale+ Age + Fare + SibSp, data=train.m, 
                 hidden=3, threshold=0.01, linear.output = F, act.fct = "logistic")
nn4 <- neuralnet(Survived ~ Pclass + Sexmale+ Age + Fare + SibSp, data=train.m, 
                 hidden=4, threshold=0.01, linear.output = F, act.fct = "logistic")
nn21 <- neuralnet(Survived ~ Pclass + Sexmale+ Age + Fare + SibSp, data=train.m, 
                 hidden=c(2,1), threshold=0.01, linear.output = F, act.fct = "logistic")
nn31 <- neuralnet(Survived ~ Pclass + Sexmale+ Age + Fare + SibSp, data=train.m, 
                  hidden=c(3,1), threshold=0.01, linear.output = F, act.fct = "logistic")

Construção das Matrizes de Confusão e Cálcul da Proporção de Observações Bem Classificadas

cm1 <- table(round(neuralnet::compute(nn1, test.m[,c("Pclass","Sexmale","Age","Fare","SibSp")])$net.result),data.test$Survived, dnn=c("Predicted","Real"))
cm1
##          Real
## Predicted   0   1
##         0 116  65
(sum(diag(cm1)))/sum(cm1)
## [1] 0.6408839779
cm2 <- table(round(neuralnet::compute(nn2, test.m[,c("Pclass","Sexmale","Age","Fare","SibSp")])$net.result),data.test$Survived, dnn=c("Predicted","Real"))
cm2
##          Real
## Predicted   0   1
##         0 105  14
##         1  11  51
(sum(diag(cm2)))/sum(cm2)
## [1] 0.861878453
cm3 <- table(round(neuralnet::compute(nn3, test.m[,c("Pclass","Sexmale","Age","Fare","SibSp")])$net.result),data.test$Survived, dnn=c("Predicted","Real"))
cm3
##          Real
## Predicted   0   1
##         0 107  16
##         1   9  49
(sum(diag(cm3)))/sum(cm3)
## [1] 0.861878453
cm4 <- table(round(neuralnet::compute(nn4, test.m[,c("Pclass","Sexmale","Age","Fare","SibSp")])$net.result),data.test$Survived, dnn=c("Predicted","Real"))
cm4
##          Real
## Predicted   0   1
##         0 109  15
##         1   7  50
(sum(diag(cm4)))/sum(cm4)
## [1] 0.8784530387
cm4[[1,2]]/(cm4[[1,2]]+cm4[[2,2]])
## [1] 0.2307692308
cm4[[2,1]]/(cm4[[1,1]]+cm4[[2,1]])
## [1] 0.06034482759
cm21 <- table(round(neuralnet::compute(nn21, test.m[,c("Pclass","Sexmale","Age","Fare","SibSp")])$net.result),data.test$Survived, dnn=c("Predicted","Real"))
cm21
##          Real
## Predicted   0   1
##         0 105  13
##         1  11  52
(sum(diag(cm21)))/sum(cm21)
## [1] 0.8674033149
cm31 <- table(round(neuralnet::compute(nn31, test.m[,c("Pclass","Sexmale","Age","Fare","SibSp")])$net.result),data.test$Survived, dnn=c("Predicted","Real"))
cm31
##          Real
## Predicted   0   1
##         0 105  18
##         1  11  47
(sum(diag(cm31)))/sum(cm31)
## [1] 0.8397790055

Desenho da Rede Neuronal Escolhida

plot(nn4, rep = "best")