From 2ade61411a014b3eed24bd2b382687d55233a9b5 Mon Sep 17 00:00:00 2001 From: leshe4ka46 Date: Sat, 15 Nov 2025 16:30:38 +0300 Subject: R(Cluster) --- inlab23/main.r | 86 ---------------------------------------------------------- 1 file changed, 86 deletions(-) delete mode 100755 inlab23/main.r (limited to 'inlab23/main.r') diff --git a/inlab23/main.r b/inlab23/main.r deleted file mode 100755 index 9a10ea3..0000000 --- a/inlab23/main.r +++ /dev/null @@ -1,86 +0,0 @@ -#!/usr/bin/env Rscript - -zip_income <- read.table("zipIncome.txt", header = TRUE, sep = "|") - -head(zip_income) - -names(zip_income) <- c("zipCode", "income") - -head(zip_income) - -overall_mean <- mean(zip_income$income) -overall_median <- median(zip_income$income) - -summary(zip_income) - -cat("Mean income:", overall_mean, "\n") -cat("Median income:", overall_median, "\n") - -Q1 <- quantile(zip_income$income, 0.25) -Q3 <- quantile(zip_income$income, 0.75) -IQR_value <- IQR(zip_income$income) - -lower_bound <- Q1 - 1.5 * IQR_value -upper_bound <- Q3 + 1.5 * IQR_value - -outliers <- zip_income[zip_income$income < lower_bound | zip_income$income > upper_bound, ] - -cat("Lower bound:", lower_bound, "\n") -cat("Upper bound:", upper_bound, "\n") -cat("Number of outliers:", nrow(outliers), "\n") - -head(outliers) - -mean_by_zip <- aggregate(income ~ zipCode, data = zip_income, FUN = mean) -print(mean_by_zip) - -median_by_zip <- aggregate(income ~ zipCode, data = zip_income, FUN = median) -print(median_by_zip) - -plot(zip_income$zipCode, zip_income[, "income"], - main = "income by zip code", - xlab = "zip code", - ylab = "income", -) - -zip_income_filtered <- zip_income[zip_income$income > 7000 & zip_income$income < 200000, ] - -summary(zip_income_filtered) - -new_mean <- mean(zip_income_filtered$income) -new_median <- median(zip_income_filtered$income) - -cat("Filtered mean income:", new_mean, "\n") -cat("Filtered median income:", new_median, "\n") - -boxplot(income ~ zipCode, - data = zip_income_filtered, - main = "income by zip code", - xlab = "Zip Codes", - ylab = "Income" -) - -boxplot(log10(income) ~ zipCode, - data = zip_income_filtered, - main = "income by zip code", - xlab = "Zip Codes", - ylab = "log10(Income)" -) - -if (!require(ggplot2)) install.packages("ggplot2", repos = "https://cran.r-project.org/") -library(ggplot2) - -ggplot(zip_income_filtered, aes(x = factor(zipCode), y = income)) + - geom_point(position = "jitter", alpha = 0.2) + - scale_y_log10() + - xlab("Zip Code") + - ylab("Income (log10)") + - ggtitle("Scatter plot of income by zip code") - -ggplot(zip_income_filtered, aes(x = factor(zipCode), y = income)) + - geom_point(aes(colour = factor(zipCode)), position = "jitter", alpha = 0.2) + - geom_boxplot(alpha = 0.1, outlier.shape = NA, outlier.size = -Inf) + - scale_y_log10() + - xlab("Zip Code") + - ylab("Income (log10)") + - ggtitle("Scatter plot of income by zip code") -- cgit v1.2.3