From 2ade61411a014b3eed24bd2b382687d55233a9b5 Mon Sep 17 00:00:00 2001 From: leshe4ka46 Date: Sat, 15 Nov 2025 16:30:38 +0300 Subject: R(Cluster) --- RCluster/main.r | 93 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 93 insertions(+) create mode 100755 RCluster/main.r (limited to 'RCluster/main.r') diff --git a/RCluster/main.r b/RCluster/main.r new file mode 100755 index 0000000..541078f --- /dev/null +++ b/RCluster/main.r @@ -0,0 +1,93 @@ +#!/usr/bin/env Rscript + +load("income_elec_state.rdata") + +head(income_elec_state) + +income_elec_state <- log10(income_elec_state) +income_elec_state <- income_elec_state[income_elec_state$elec > 2.83, ] + +k <- 4 +km <- kmeans(income_elec_state, k, nstart = 1000) + +km_centers <- data.frame(km$centers) +head(km_centers) + +if (!require(ggplot2)) install.packages("ggplot2", repos = "https://cran.r-project.org/") +library(ggplot2) + +ggplot( + data = income_elec_state, + mapping = aes(x = income, y = elec, color = factor(km$cluster)), +) + + labs(x = "income", y = "electricity usage") + + geom_point(shape = 1) + + geom_point( + data = km_centers, + mapping = aes( + x = income, + y = elec, + color = factor(rownames(km_centers)), + label = NULL + ), + shape = 13, + size = 4 + ) + + +wss <- NULL +range <- 1:10 +for (i in range) { + res <- kmeans(income_elec_state, i, nstart = 10) + wss <- c(wss, res$tot.withinss) +} +wss_df <- data.frame(wss) +ggplot(wss_df, aes(x = range, y = wss)) + + geom_path() + + geom_point() + + scale_x_continuous(breaks = range) + +Q1 <- quantile(income_elec_state$elec, 0.25) +Q3 <- quantile(income_elec_state$elec, 0.75) +IQR_value <- Q3 - Q1 + +lower_bound <- Q1 - 1.5 * IQR_value +upper_bound <- Q3 + 1.5 * IQR_value + +outliers <- income_elec_state[income_elec_state$elec < lower_bound | income_elec_state$elec > upper_bound, ] + +cat("Lower bound:", lower_bound, "\n") +cat("Upper bound:", upper_bound, "\n") +cat("Number of outliers:", nrow(outliers), "\n") +head(outliers) + +head(outliers <- income_elec_state[income_elec_state$elec < 2.84, ]) +head(outliers <- income_elec_state[income_elec_state$income > 4.85, ]) + + +if (!require(maps)) install.packages("maps", repos = "https://cran.r-project.org/") +library(maps) + +head(km$cluster) + +map_color <- km$cluster[order(names(km$cluster))] + +head(map_color) + +map("state", fill = TRUE, col = map_color) + + +if (!require(ggdendro)) install.packages("ggdendro", repos = "https://cran.r-project.org/") +library(ggdendro) + +distance <- dist(income_elec_state, method = "euclidean") + +# single - closest pair of points +# complete - farthest pair of points +# average - average distance between all pairs +h_clust <- hclust(distance, method = "complete") +h_clust + +plot(ggdendrogram(h_clust)) + +cutree(h_clust, k = 2) -- cgit v1.2.3