Bhawna G. Panwar

2 minute read

The HouseVotes84 dataset was used for applying the naive Bayes algorithm.

library (e1071)

## Naive Bayes Classifier for Discrete Predictors: we use again the Congressional Voting Records of 1984
# Note refusals to vote have been treated as missing values!

data (HouseVotes84, package="mlbench") 
model <- naiveBayes(Class ~ ., data = HouseVotes84) 
head(HouseVotes84)
##        Class   V1 V2 V3   V4   V5 V6 V7 V8 V9 V10  V11  V12 V13 V14 V15
## 1 republican    n  y  n    y    y  y  n  n  n   y <NA>    y   y   y   n
## 2 republican    n  y  n    y    y  y  n  n  n   n    n    y   y   y   n
## 3   democrat <NA>  y  y <NA>    y  y  n  n  n   n    y    n   y   y   n
## 4   democrat    n  y  y    n <NA>  y  n  n  n   n    y    n   y   n   n
## 5   democrat    y  y  y    n    y  y  n  n  n   n    y <NA>   y   y   y
## 6   democrat    n  y  y    n    y  y  n  n  n   n    n    n   y   y   y
##    V16
## 1    y
## 2 <NA>
## 3    n
## 4    y
## 5    y
## 6    y
# predict the outcome of the first 20 records
predict(model, HouseVotes84[1:20,-1]) 
##  [1] republican republican republican democrat   democrat   democrat  
##  [7] republican republican republican democrat   republican republican
## [13] democrat   democrat   republican republican democrat   democrat  
## [19] republican democrat  
## Levels: democrat republican
# same but displaying posteriors
predict(model, HouseVotes84[1:20,-1], type = "raw") 
##           democrat   republican
##  [1,] 1.029209e-07 9.999999e-01
##  [2,] 5.820415e-08 9.999999e-01
##  [3,] 5.684937e-03 9.943151e-01
##  [4,] 9.985798e-01 1.420152e-03
##  [5,] 9.666720e-01 3.332802e-02
##  [6,] 8.121430e-01 1.878570e-01
##  [7,] 1.751512e-04 9.998248e-01
##  [8,] 8.300100e-06 9.999917e-01
##  [9,] 8.277705e-08 9.999999e-01
## [10,] 1.000000e+00 5.029425e-11
## [11,] 1.122180e-06 9.999989e-01
## [12,] 5.922777e-06 9.999941e-01
## [13,] 1.000000e+00 5.040020e-09
## [14,] 1.000000e+00 6.242221e-10
## [15,] 4.846577e-07 9.999995e-01
## [16,] 1.154563e-07 9.999999e-01
## [17,] 9.999975e-01 2.524185e-06
## [18,] 1.000000e+00 7.477385e-11
## [19,] 6.641180e-08 9.999999e-01
## [20,] 1.000000e+00 7.400151e-13
# now all of them: this is the resubstituion error
pred <- predict(model, HouseVotes84[,-1])

# form and display confusion matrix & overall accuracy
tab <- table(pred, HouseVotes84$Class) 
tab
##             
## pred         democrat republican
##   democrat        238         13
##   republican       29        155
sum(tab[row(tab)==col(tab)])/sum(tab)
## [1] 0.9034483
## using Laplace smoothing: 
model <- naiveBayes(Class ~ ., data = HouseVotes84, laplace = 3)
pred <- predict(model, HouseVotes84[,-1]) 
tab <- table(pred, HouseVotes84$Class) 
sum(tab[row(tab)==col(tab)])/sum(tab)
## [1] 0.9034483
comments powered by Disqus