#!/usr/bin/env Rscript zip_income <- read.table("zipIncome.txt", header = TRUE, sep = "|") names(zip_income) <- c("zipCode", "income") head(zip_income) tail(zip_income) overall_mean <- mean(zip_income$income) overall_median <- median(zip_income$income) summary(zip_income) cat("Mean income:", overall_mean, "\n") cat("Median income:", overall_median, "\n") dim(zip_income) names(zip_income) unique(zip_income$zipCode) colSums(is.na(zip_income)) zip_income_clean <- na.omit(zip_income) colSums(is.na(zip_income_clean)) zip_income$income[is.na(zip_income$income)] <- -100 zip_income$zipCode[is.na(zip_income$zipCode)] <- 99 write.table(zip_income, file = "zip_income_clean.txt", # output filename sep = "|", row.names = FALSE ) s1 <- c(seq(1, 10, by=1), seq(16, 25, by=1)) df <- data.frame(x = seq(1, 20), y = s1) print(df) plot(df) model <- lm(y ~ x, data = df) summary(model) plot(df$x, df$y) abline(model, col = "red") plot(density(zip_income$income)) hist(zip_income$income) library(lattice) densityplot(zip_income$income) densityplot(log(zip_income$income)) x <- rnorm(10) y <- rnorm(10,2) t.test(x, y) # ---------------------------------- zip_income <- read.table("zipIncome.txt", header = TRUE, sep = "|") names(zip_income) <- c("zipCode", "income") model <- lm(income ~ factor(zipCode), data = zip_income) summary(model) plot(zip_income$zipCode, zip_income$income) abline(model, col = "red")