nb

author: leshe4ka46 <alex9102naid1@ya.ru> 2025-12-09 10:10:21 +0300
committer: leshe4ka46 <alex9102naid1@ya.ru> 2025-12-09 10:10:21 +0300
commit: d8df84af00cfe3038fc696417fd97b33eca0146e (patch)
tree: 19363d55a76d231d4bb5f4b747b17b8199a638a0 /R_NB/main.r
parent: 2ade61411a014b3eed24bd2b382687d55233a9b5 (diff)
1 files changed, 77 insertions, 0 deletions
diff --git a/R_NB/main.r b/R_NB/main.r
new file mode 100755
index 0000000..75979ad
--- /dev/null
+++ b/R_NB/main.r
@@ -0,0 +1,77 @@
+#!/usr/bin/env Rscript
+
+if (!require(e1071)) install.packages("e1071", repos = "https://cran.r-project.org/", Ncpus = 16) # nolint
+library(e1071)
+
+if (!require(dplyr)) install.packages("dplyr", repos = "https://cran.r-project.org/", Ncpus = 16) # nolint
+library(dplyr)
+
+# P(y/x) = P(y) * P(x/y) / P(x)
+
+data <- read.csv("./nbtrain.csv")
+train <- head(data, 9010)
+test <- tail(data, 1000)
+
+# a
+model <- naiveBayes(income ~ age + sex + educ, data = train)
+model
+
+
+pred_income <- predict(model, newdata = test)
+
+tt <- table(Predicted = pred_income, Actual = test$income)
+print(tt)
+
+misclass <- function(tt) {
+  # total_wrong / total_records
+  overall_misclass <- (sum(tt) - sum(diag(tt))) / sum(tt)
+  cat("Overall misclassification rate:", round(overall_misclass, 4), "\n")
+
+  classes <- rownames(tt)
+  misclass_per_class <- numeric(length(classes))
+  names(misclass_per_class) <- classes
+
+  for (cls in classes) {
+    correct <- tt[cls, cls]
+    total_in_class <- sum(tt[, cls])
+    misclass_per_class[cls] <- (total_in_class - correct) / total_in_class
+  }
+
+  cat("Misclassification rate per income class:\n")
+  print(round(misclass_per_class, 4))
+}
+
+misclass(tt)
+
+
+model_sex <- naiveBayes(sex ~ age + educ + income, data = train)
+model_sex
+pred_sex <- predict(model_sex, newdata = test)
+tt <- table(Predicted = pred_sex, Actual = test$sex)
+print(tt)
+
+misclass(tt)
+
+
+test_random <- function() {
+  data_female <- subset(train, sex == "F")
+  data_male <- subset(train, sex == "M")
+
+  data_female <- sample_n(data_female, 3500)
+  data_male <- sample_n(data_male, 3500)
+
+  random_sample <- rbind(data_male, data_female)
+  model_random <- naiveBayes(sex ~ age + income + educ, data = random_sample)
+  print(model_random)
+  pred_random <- predict(model_random, test, type = "class")
+  tt <- table(Predicted = pred_random, Actual = test$sex)
+  print(tt)
+
+  misclass(tt)
+}
+
+
+test_random()
+# test_random()
+# test_random()
+# test_random()
author	leshe4ka46 <alex9102naid1@ya.ru>	2025-12-09 10:10:21 +0300
committer	leshe4ka46 <alex9102naid1@ya.ru>	2025-12-09 10:10:21 +0300
commit	d8df84af00cfe3038fc696417fd97b33eca0146e (patch)
tree	19363d55a76d231d4bb5f4b747b17b8199a638a0 /R_NB/main.r
parent	2ade61411a014b3eed24bd2b382687d55233a9b5 (diff)