1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
|
#!/usr/bin/env Rscript
df <- read.csv("zeta.csv")
df <- subset(df, sex == "F")
df <- subset(df, select = -c(zcta, sex))
df <- subset(df, 8 < meaneducation & meaneducation < 18)
df <- subset(df, 10000 < meanhouseholdincome & meanhouseholdincome < 200000)
df <- subset(df, 10000 < meanhouseholdincome & meanhouseholdincome < 200000)
df <- subset(df, 20 < meanage & meanage < 60)
df$log_income <- log10(df$meanhouseholdincome)
names(df) <- c("X", "age", "education", "employment", "income", "log_income")
library(ggplot2)
ggplot(df, aes(x = age, y = log_income)) +
geom_point(alpha = 0.2) +
labs(x = "age", y = "income", title = "log_income(age)")
model <- lm(log_income ~ age, df)
print(model)
summary(model)
|