aboutsummaryrefslogtreecommitdiff
path: root/inlab23/main.r
diff options
context:
space:
mode:
Diffstat (limited to 'inlab23/main.r')
-rwxr-xr-xinlab23/main.r86
1 files changed, 86 insertions, 0 deletions
diff --git a/inlab23/main.r b/inlab23/main.r
new file mode 100755
index 0000000..9a10ea3
--- /dev/null
+++ b/inlab23/main.r
@@ -0,0 +1,86 @@
+#!/usr/bin/env Rscript
+
+zip_income <- read.table("zipIncome.txt", header = TRUE, sep = "|")
+
+head(zip_income)
+
+names(zip_income) <- c("zipCode", "income")
+
+head(zip_income)
+
+overall_mean <- mean(zip_income$income)
+overall_median <- median(zip_income$income)
+
+summary(zip_income)
+
+cat("Mean income:", overall_mean, "\n")
+cat("Median income:", overall_median, "\n")
+
+Q1 <- quantile(zip_income$income, 0.25)
+Q3 <- quantile(zip_income$income, 0.75)
+IQR_value <- IQR(zip_income$income)
+
+lower_bound <- Q1 - 1.5 * IQR_value
+upper_bound <- Q3 + 1.5 * IQR_value
+
+outliers <- zip_income[zip_income$income < lower_bound | zip_income$income > upper_bound, ]
+
+cat("Lower bound:", lower_bound, "\n")
+cat("Upper bound:", upper_bound, "\n")
+cat("Number of outliers:", nrow(outliers), "\n")
+
+head(outliers)
+
+mean_by_zip <- aggregate(income ~ zipCode, data = zip_income, FUN = mean)
+print(mean_by_zip)
+
+median_by_zip <- aggregate(income ~ zipCode, data = zip_income, FUN = median)
+print(median_by_zip)
+
+plot(zip_income$zipCode, zip_income[, "income"],
+ main = "income by zip code",
+ xlab = "zip code",
+ ylab = "income",
+)
+
+zip_income_filtered <- zip_income[zip_income$income > 7000 & zip_income$income < 200000, ]
+
+summary(zip_income_filtered)
+
+new_mean <- mean(zip_income_filtered$income)
+new_median <- median(zip_income_filtered$income)
+
+cat("Filtered mean income:", new_mean, "\n")
+cat("Filtered median income:", new_median, "\n")
+
+boxplot(income ~ zipCode,
+ data = zip_income_filtered,
+ main = "income by zip code",
+ xlab = "Zip Codes",
+ ylab = "Income"
+)
+
+boxplot(log10(income) ~ zipCode,
+ data = zip_income_filtered,
+ main = "income by zip code",
+ xlab = "Zip Codes",
+ ylab = "log10(Income)"
+)
+
+if (!require(ggplot2)) install.packages("ggplot2", repos = "https://cran.r-project.org/")
+library(ggplot2)
+
+ggplot(zip_income_filtered, aes(x = factor(zipCode), y = income)) +
+ geom_point(position = "jitter", alpha = 0.2) +
+ scale_y_log10() +
+ xlab("Zip Code") +
+ ylab("Income (log10)") +
+ ggtitle("Scatter plot of income by zip code")
+
+ggplot(zip_income_filtered, aes(x = factor(zipCode), y = income)) +
+ geom_point(aes(colour = factor(zipCode)), position = "jitter", alpha = 0.2) +
+ geom_boxplot(alpha = 0.1, outlier.shape = NA, outlier.size = -Inf) +
+ scale_y_log10() +
+ xlab("Zip Code") +
+ ylab("Income (log10)") +
+ ggtitle("Scatter plot of income by zip code")