The HouseVotes84 dataset was used for applying the naive Bayes algorithm.
library (e1071)
## Naive Bayes Classifier for Discrete Predictors: we use again the Congressional Voting Records of 1984
# Note refusals to vote have been treated as missing values!
data (HouseVotes84, package="mlbench")
model <- naiveBayes(Class ~ ., data = HouseVotes84)
head(HouseVotes84)
## Class V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15
## 1 republican n y n y y y n n n y <NA> y y y n
## 2 republican n y n y y y n n n n n y y y n
## 3 democrat <NA> y y <NA> y y n n n n y n y y n
## 4 democrat n y y n <NA> y n n n n y n y n n
## 5 democrat y y y n y y n n n n y <NA> y y y
## 6 democrat n y y n y y n n n n n n y y y
## V16
## 1 y
## 2 <NA>
## 3 n
## 4 y
## 5 y
## 6 y
# predict the outcome of the first 20 records
predict(model, HouseVotes84[1:20,-1])
## [1] republican republican republican democrat democrat democrat
## [7] republican republican republican democrat republican republican
## [13] democrat democrat republican republican democrat democrat
## [19] republican democrat
## Levels: democrat republican
# same but displaying posteriors
predict(model, HouseVotes84[1:20,-1], type = "raw")
## democrat republican
## [1,] 1.029209e-07 9.999999e-01
## [2,] 5.820415e-08 9.999999e-01
## [3,] 5.684937e-03 9.943151e-01
## [4,] 9.985798e-01 1.420152e-03
## [5,] 9.666720e-01 3.332802e-02
## [6,] 8.121430e-01 1.878570e-01
## [7,] 1.751512e-04 9.998248e-01
## [8,] 8.300100e-06 9.999917e-01
## [9,] 8.277705e-08 9.999999e-01
## [10,] 1.000000e+00 5.029425e-11
## [11,] 1.122180e-06 9.999989e-01
## [12,] 5.922777e-06 9.999941e-01
## [13,] 1.000000e+00 5.040020e-09
## [14,] 1.000000e+00 6.242221e-10
## [15,] 4.846577e-07 9.999995e-01
## [16,] 1.154563e-07 9.999999e-01
## [17,] 9.999975e-01 2.524185e-06
## [18,] 1.000000e+00 7.477385e-11
## [19,] 6.641180e-08 9.999999e-01
## [20,] 1.000000e+00 7.400151e-13
# now all of them: this is the resubstituion error
pred <- predict(model, HouseVotes84[,-1])
# form and display confusion matrix & overall accuracy
tab <- table(pred, HouseVotes84$Class)
tab
##
## pred democrat republican
## democrat 238 13
## republican 29 155
sum(tab[row(tab)==col(tab)])/sum(tab)
## [1] 0.9034483
## using Laplace smoothing:
model <- naiveBayes(Class ~ ., data = HouseVotes84, laplace = 3)
pred <- predict(model, HouseVotes84[,-1])
tab <- table(pred, HouseVotes84$Class)
sum(tab[row(tab)==col(tab)])/sum(tab)
## [1] 0.9034483
Share this post
Twitter
Google+
Facebook
Reddit
LinkedIn
StumbleUpon
Email