# Sample Reading Data -- ACS # This is an example for only illustrate to use the ACS without considering the weights on the individual personal record # Heavy modifications are necessary to use as an actual research # Ideas from Tom Bales (2021 Winter Class) rm(list = ls()) acs_pop_il <- read_csv("/var/www/html/jlee141/econdata/acs_data/acs2019_pop_il.csv") str(acs_pop_il) library(dplyr) # Select only variables you need here to be used for the project. # Find the variables you are interested in from see PUMS_DATA_Dictionary_2018.pdf after page 30 # Here is an example to include 8 variables and only BA or higher education attainment: acs_pop_il2 <- acs_pop_il %>% select(ST,SCHL,AGEP,CIT,FOD1P,WAGP,SEX,OCCP) %>% mutate(SEX = as.factor(SEX), CIT = as.factor(CIT), SCHL = as.factor(SCHL)) # Filter out the Non-college / Graduate school degrees attained # # Select only people has Associate and Bachelor Degree - see PUMS_DATA_Dictionary_2018.pdf Page 42 acs_pop_il2 <- acs_pop_il2 %>% filter(SCHL %in% c("21","22","23","24")) # Degrees from FOD1P. Check the full list in the dictionary. Dummy for Business Major acs_pop_il2 <- acs_pop_il2 %>% mutate(Degree= case_when( FOD1P %in% c(6200,6201,6202,6203,6204,6205,6206,6207,6209,6210,6211,6212,6299) ~ "BUSINESS" , FOD1P %in% c(2100,2101,2102,2105,2106,2107,3700,3701,3702,4005) ~ "COMP_MATH", TRUE ~ "A.OTHERS")) %>% mutate(Degree = as.factor(Degree), OLDER = ifelse(acs_pop_il2$AGEP >55,1,0), AGE2 = AGEP^2, logWage = log(WAGP)) %>% filter(WAGP > 0) # Let's find if the degree matter to the wage income lm1 <- lm(logWage~SEX+SCHL+AGEP+AGE2+CIT+Degree, data=acs_pop_il2) summary(lm1)