aboutsummaryrefslogtreecommitdiff
path: root/inlab23/main.r
diff options
context:
space:
mode:
authorleshe4ka46 <alex9102naid1@ya.ru>2025-11-15 16:30:38 +0300
committerleshe4ka46 <alex9102naid1@ya.ru>2025-11-18 14:05:14 +0300
commit2ade61411a014b3eed24bd2b382687d55233a9b5 (patch)
tree1b91eb11d38a0a053a7a806fedadb8c8a676738b /inlab23/main.r
parent5aaff9711387ce1ea1ec8ee5c5b4ecd9e1ea3dd1 (diff)
R(Cluster)
Diffstat (limited to 'inlab23/main.r')
-rwxr-xr-xinlab23/main.r86
1 files changed, 0 insertions, 86 deletions
diff --git a/inlab23/main.r b/inlab23/main.r
deleted file mode 100755
index 9a10ea3..0000000
--- a/inlab23/main.r
+++ /dev/null
@@ -1,86 +0,0 @@
-#!/usr/bin/env Rscript
-
-zip_income <- read.table("zipIncome.txt", header = TRUE, sep = "|")
-
-head(zip_income)
-
-names(zip_income) <- c("zipCode", "income")
-
-head(zip_income)
-
-overall_mean <- mean(zip_income$income)
-overall_median <- median(zip_income$income)
-
-summary(zip_income)
-
-cat("Mean income:", overall_mean, "\n")
-cat("Median income:", overall_median, "\n")
-
-Q1 <- quantile(zip_income$income, 0.25)
-Q3 <- quantile(zip_income$income, 0.75)
-IQR_value <- IQR(zip_income$income)
-
-lower_bound <- Q1 - 1.5 * IQR_value
-upper_bound <- Q3 + 1.5 * IQR_value
-
-outliers <- zip_income[zip_income$income < lower_bound | zip_income$income > upper_bound, ]
-
-cat("Lower bound:", lower_bound, "\n")
-cat("Upper bound:", upper_bound, "\n")
-cat("Number of outliers:", nrow(outliers), "\n")
-
-head(outliers)
-
-mean_by_zip <- aggregate(income ~ zipCode, data = zip_income, FUN = mean)
-print(mean_by_zip)
-
-median_by_zip <- aggregate(income ~ zipCode, data = zip_income, FUN = median)
-print(median_by_zip)
-
-plot(zip_income$zipCode, zip_income[, "income"],
- main = "income by zip code",
- xlab = "zip code",
- ylab = "income",
-)
-
-zip_income_filtered <- zip_income[zip_income$income > 7000 & zip_income$income < 200000, ]
-
-summary(zip_income_filtered)
-
-new_mean <- mean(zip_income_filtered$income)
-new_median <- median(zip_income_filtered$income)
-
-cat("Filtered mean income:", new_mean, "\n")
-cat("Filtered median income:", new_median, "\n")
-
-boxplot(income ~ zipCode,
- data = zip_income_filtered,
- main = "income by zip code",
- xlab = "Zip Codes",
- ylab = "Income"
-)
-
-boxplot(log10(income) ~ zipCode,
- data = zip_income_filtered,
- main = "income by zip code",
- xlab = "Zip Codes",
- ylab = "log10(Income)"
-)
-
-if (!require(ggplot2)) install.packages("ggplot2", repos = "https://cran.r-project.org/")
-library(ggplot2)
-
-ggplot(zip_income_filtered, aes(x = factor(zipCode), y = income)) +
- geom_point(position = "jitter", alpha = 0.2) +
- scale_y_log10() +
- xlab("Zip Code") +
- ylab("Income (log10)") +
- ggtitle("Scatter plot of income by zip code")
-
-ggplot(zip_income_filtered, aes(x = factor(zipCode), y = income)) +
- geom_point(aes(colour = factor(zipCode)), position = "jitter", alpha = 0.2) +
- geom_boxplot(alpha = 0.1, outlier.shape = NA, outlier.size = -Inf) +
- scale_y_log10() +
- xlab("Zip Code") +
- ylab("Income (log10)") +
- ggtitle("Scatter plot of income by zip code")