From 5aaff9711387ce1ea1ec8ee5c5b4ecd9e1ea3dd1 Mon Sep 17 00:00:00 2001 From: leshe4ka46 Date: Tue, 11 Nov 2025 11:34:38 +0300 Subject: upd --- inlab23/Rplots.pdf | Bin 0 -> 4581632 bytes inlab23/main.r | 86 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 86 insertions(+) create mode 100644 inlab23/Rplots.pdf create mode 100755 inlab23/main.r (limited to 'inlab23') diff --git a/inlab23/Rplots.pdf b/inlab23/Rplots.pdf new file mode 100644 index 0000000..a7baacc Binary files /dev/null and b/inlab23/Rplots.pdf differ diff --git a/inlab23/main.r b/inlab23/main.r new file mode 100755 index 0000000..9a10ea3 --- /dev/null +++ b/inlab23/main.r @@ -0,0 +1,86 @@ +#!/usr/bin/env Rscript + +zip_income <- read.table("zipIncome.txt", header = TRUE, sep = "|") + +head(zip_income) + +names(zip_income) <- c("zipCode", "income") + +head(zip_income) + +overall_mean <- mean(zip_income$income) +overall_median <- median(zip_income$income) + +summary(zip_income) + +cat("Mean income:", overall_mean, "\n") +cat("Median income:", overall_median, "\n") + +Q1 <- quantile(zip_income$income, 0.25) +Q3 <- quantile(zip_income$income, 0.75) +IQR_value <- IQR(zip_income$income) + +lower_bound <- Q1 - 1.5 * IQR_value +upper_bound <- Q3 + 1.5 * IQR_value + +outliers <- zip_income[zip_income$income < lower_bound | zip_income$income > upper_bound, ] + +cat("Lower bound:", lower_bound, "\n") +cat("Upper bound:", upper_bound, "\n") +cat("Number of outliers:", nrow(outliers), "\n") + +head(outliers) + +mean_by_zip <- aggregate(income ~ zipCode, data = zip_income, FUN = mean) +print(mean_by_zip) + +median_by_zip <- aggregate(income ~ zipCode, data = zip_income, FUN = median) +print(median_by_zip) + +plot(zip_income$zipCode, zip_income[, "income"], + main = "income by zip code", + xlab = "zip code", + ylab = "income", +) + +zip_income_filtered <- zip_income[zip_income$income > 7000 & zip_income$income < 200000, ] + +summary(zip_income_filtered) + +new_mean <- mean(zip_income_filtered$income) +new_median <- median(zip_income_filtered$income) + +cat("Filtered mean income:", new_mean, "\n") +cat("Filtered median income:", new_median, "\n") + +boxplot(income ~ zipCode, + data = zip_income_filtered, + main = "income by zip code", + xlab = "Zip Codes", + ylab = "Income" +) + +boxplot(log10(income) ~ zipCode, + data = zip_income_filtered, + main = "income by zip code", + xlab = "Zip Codes", + ylab = "log10(Income)" +) + +if (!require(ggplot2)) install.packages("ggplot2", repos = "https://cran.r-project.org/") +library(ggplot2) + +ggplot(zip_income_filtered, aes(x = factor(zipCode), y = income)) + + geom_point(position = "jitter", alpha = 0.2) + + scale_y_log10() + + xlab("Zip Code") + + ylab("Income (log10)") + + ggtitle("Scatter plot of income by zip code") + +ggplot(zip_income_filtered, aes(x = factor(zipCode), y = income)) + + geom_point(aes(colour = factor(zipCode)), position = "jitter", alpha = 0.2) + + geom_boxplot(alpha = 0.1, outlier.shape = NA, outlier.size = -Inf) + + scale_y_log10() + + xlab("Zip Code") + + ylab("Income (log10)") + + ggtitle("Scatter plot of income by zip code") -- cgit v1.2.3