diff options
| author | leshe4ka46 <alex9102naid1@ya.ru> | 2025-11-15 16:30:38 +0300 |
|---|---|---|
| committer | leshe4ka46 <alex9102naid1@ya.ru> | 2025-11-18 14:05:14 +0300 |
| commit | 2ade61411a014b3eed24bd2b382687d55233a9b5 (patch) | |
| tree | 1b91eb11d38a0a053a7a806fedadb8c8a676738b /RCluster | |
| parent | 5aaff9711387ce1ea1ec8ee5c5b4ecd9e1ea3dd1 (diff) | |
R(Cluster)
Diffstat (limited to 'RCluster')
| -rw-r--r-- | RCluster/Rplots.pdf | bin | 0 -> 5625 bytes | |||
| -rwxr-xr-x | RCluster/main.r | 93 | ||||
| -rwxr-xr-x | RCluster/main2.r | 21 |
3 files changed, 114 insertions, 0 deletions
diff --git a/RCluster/Rplots.pdf b/RCluster/Rplots.pdf Binary files differnew file mode 100644 index 0000000..ef25128 --- /dev/null +++ b/RCluster/Rplots.pdf diff --git a/RCluster/main.r b/RCluster/main.r new file mode 100755 index 0000000..541078f --- /dev/null +++ b/RCluster/main.r @@ -0,0 +1,93 @@ +#!/usr/bin/env Rscript + +load("income_elec_state.rdata") + +head(income_elec_state) + +income_elec_state <- log10(income_elec_state) +income_elec_state <- income_elec_state[income_elec_state$elec > 2.83, ] + +k <- 4 +km <- kmeans(income_elec_state, k, nstart = 1000) + +km_centers <- data.frame(km$centers) +head(km_centers) + +if (!require(ggplot2)) install.packages("ggplot2", repos = "https://cran.r-project.org/") +library(ggplot2) + +ggplot( + data = income_elec_state, + mapping = aes(x = income, y = elec, color = factor(km$cluster)), +) + + labs(x = "income", y = "electricity usage") + + geom_point(shape = 1) + + geom_point( + data = km_centers, + mapping = aes( + x = income, + y = elec, + color = factor(rownames(km_centers)), + label = NULL + ), + shape = 13, + size = 4 + ) + + +wss <- NULL +range <- 1:10 +for (i in range) { + res <- kmeans(income_elec_state, i, nstart = 10) + wss <- c(wss, res$tot.withinss) +} +wss_df <- data.frame(wss) +ggplot(wss_df, aes(x = range, y = wss)) + + geom_path() + + geom_point() + + scale_x_continuous(breaks = range) + +Q1 <- quantile(income_elec_state$elec, 0.25) +Q3 <- quantile(income_elec_state$elec, 0.75) +IQR_value <- Q3 - Q1 + +lower_bound <- Q1 - 1.5 * IQR_value +upper_bound <- Q3 + 1.5 * IQR_value + +outliers <- income_elec_state[income_elec_state$elec < lower_bound | income_elec_state$elec > upper_bound, ] + +cat("Lower bound:", lower_bound, "\n") +cat("Upper bound:", upper_bound, "\n") +cat("Number of outliers:", nrow(outliers), "\n") +head(outliers) + +head(outliers <- income_elec_state[income_elec_state$elec < 2.84, ]) +head(outliers <- income_elec_state[income_elec_state$income > 4.85, ]) + + +if (!require(maps)) install.packages("maps", repos = "https://cran.r-project.org/") +library(maps) + +head(km$cluster) + +map_color <- km$cluster[order(names(km$cluster))] + +head(map_color) + +map("state", fill = TRUE, col = map_color) + + +if (!require(ggdendro)) install.packages("ggdendro", repos = "https://cran.r-project.org/") +library(ggdendro) + +distance <- dist(income_elec_state, method = "euclidean") + +# single - closest pair of points +# complete - farthest pair of points +# average - average distance between all pairs +h_clust <- hclust(distance, method = "complete") +h_clust + +plot(ggdendrogram(h_clust)) + +cutree(h_clust, k = 2) diff --git a/RCluster/main2.r b/RCluster/main2.r new file mode 100755 index 0000000..82bcf7e --- /dev/null +++ b/RCluster/main2.r @@ -0,0 +1,21 @@ +#!/usr/bin/env Rscript + + +income_elec_state = data.frame(a = c(1, 3:6, 8, 10, 12, 13), b = c(1:5, seq(6, 12, by = 2))) + +plot(income_elec_state) + +if (!require(ggdendro)) install.packages("ggdendro", repos = "https://cran.r-project.org/") +library(ggdendro) + +distance <- dist(income_elec_state, method = "euclidean") + +# single - closest pair of points +# complete - farthest pair of points +# average - average distance between all pairs +h_clust <- hclust(distance, method = "singlet") +h_clust + +plot(ggdendrogram(h_clust)) + +cutree(h_clust, k = 2)
\ No newline at end of file |
