diff options
| author | leshe4ka46 <alex9102naid1@ya.ru> | 2025-11-15 16:30:38 +0300 |
|---|---|---|
| committer | leshe4ka46 <alex9102naid1@ya.ru> | 2025-11-18 14:05:14 +0300 |
| commit | 2ade61411a014b3eed24bd2b382687d55233a9b5 (patch) | |
| tree | 1b91eb11d38a0a053a7a806fedadb8c8a676738b /R_inlab23/main.r | |
| parent | 5aaff9711387ce1ea1ec8ee5c5b4ecd9e1ea3dd1 (diff) | |
R(Cluster)
Diffstat (limited to 'R_inlab23/main.r')
| -rwxr-xr-x | R_inlab23/main.r | 86 |
1 files changed, 86 insertions, 0 deletions
diff --git a/R_inlab23/main.r b/R_inlab23/main.r new file mode 100755 index 0000000..9a10ea3 --- /dev/null +++ b/R_inlab23/main.r @@ -0,0 +1,86 @@ +#!/usr/bin/env Rscript + +zip_income <- read.table("zipIncome.txt", header = TRUE, sep = "|") + +head(zip_income) + +names(zip_income) <- c("zipCode", "income") + +head(zip_income) + +overall_mean <- mean(zip_income$income) +overall_median <- median(zip_income$income) + +summary(zip_income) + +cat("Mean income:", overall_mean, "\n") +cat("Median income:", overall_median, "\n") + +Q1 <- quantile(zip_income$income, 0.25) +Q3 <- quantile(zip_income$income, 0.75) +IQR_value <- IQR(zip_income$income) + +lower_bound <- Q1 - 1.5 * IQR_value +upper_bound <- Q3 + 1.5 * IQR_value + +outliers <- zip_income[zip_income$income < lower_bound | zip_income$income > upper_bound, ] + +cat("Lower bound:", lower_bound, "\n") +cat("Upper bound:", upper_bound, "\n") +cat("Number of outliers:", nrow(outliers), "\n") + +head(outliers) + +mean_by_zip <- aggregate(income ~ zipCode, data = zip_income, FUN = mean) +print(mean_by_zip) + +median_by_zip <- aggregate(income ~ zipCode, data = zip_income, FUN = median) +print(median_by_zip) + +plot(zip_income$zipCode, zip_income[, "income"], + main = "income by zip code", + xlab = "zip code", + ylab = "income", +) + +zip_income_filtered <- zip_income[zip_income$income > 7000 & zip_income$income < 200000, ] + +summary(zip_income_filtered) + +new_mean <- mean(zip_income_filtered$income) +new_median <- median(zip_income_filtered$income) + +cat("Filtered mean income:", new_mean, "\n") +cat("Filtered median income:", new_median, "\n") + +boxplot(income ~ zipCode, + data = zip_income_filtered, + main = "income by zip code", + xlab = "Zip Codes", + ylab = "Income" +) + +boxplot(log10(income) ~ zipCode, + data = zip_income_filtered, + main = "income by zip code", + xlab = "Zip Codes", + ylab = "log10(Income)" +) + +if (!require(ggplot2)) install.packages("ggplot2", repos = "https://cran.r-project.org/") +library(ggplot2) + +ggplot(zip_income_filtered, aes(x = factor(zipCode), y = income)) + + geom_point(position = "jitter", alpha = 0.2) + + scale_y_log10() + + xlab("Zip Code") + + ylab("Income (log10)") + + ggtitle("Scatter plot of income by zip code") + +ggplot(zip_income_filtered, aes(x = factor(zipCode), y = income)) + + geom_point(aes(colour = factor(zipCode)), position = "jitter", alpha = 0.2) + + geom_boxplot(alpha = 0.1, outlier.shape = NA, outlier.size = -Inf) + + scale_y_log10() + + xlab("Zip Code") + + ylab("Income (log10)") + + ggtitle("Scatter plot of income by zip code") |
