Question 1
covid = read_csv("https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-counties.csv")
PopulationEstimates = read_excel("/users/noblex/github/geog-176A-labs/data/PopulationEstimates.xls", skip = 2)
state.of.interest = "California"
(state_covid = covid %>%
filter(state == state.of.interest) %>%
group_by(county) %>%
mutate(newCases = cases - lag(cases)) %>%
ungroup())
## # A tibble: 10,479 x 7
## date county state fips cases deaths newCases
## <date> <chr> <chr> <chr> <dbl> <dbl> <dbl>
## 1 2020-01-25 Orange California 06059 1 0 NA
## 2 2020-01-26 Los Angeles California 06037 1 0 NA
## 3 2020-01-26 Orange California 06059 1 0 0
## 4 2020-01-27 Los Angeles California 06037 1 0 0
## 5 2020-01-27 Orange California 06059 1 0 0
## 6 2020-01-28 Los Angeles California 06037 1 0 0
## 7 2020-01-28 Orange California 06059 1 0 0
## 8 2020-01-29 Los Angeles California 06037 1 0 0
## 9 2020-01-29 Orange California 06059 1 0 0
## 10 2020-01-30 Los Angeles California 06037 1 0 0
## # … with 10,469 more rows
(county_most_new_cases = state_covid %>%
filter(date == max(date)) %>%
slice_max(newCases, n = 5) %>%
select(county, newCases))
## # A tibble: 5 x 2
## county newCases
## <chr> <dbl>
## 1 Los Angeles 809
## 2 San Diego 265
## 3 Orange 185
## 4 Fresno 159
## 5 San Bernardino 156
(county_most_cumulative_cases = state_covid %>%
filter(date == max(date)) %>%
slice_max(cases, n = 5) %>%
select(county, cases))
## # A tibble: 5 x 2
## county cases
## <chr> <dbl>
## 1 Los Angeles 253985
## 2 Riverside 55073
## 3 Orange 52121
## 4 San Bernardino 50699
## 5 San Diego 42742
kable(county_most_new_cases, caption = "Most New Cases California Counties",
col.names = c("County", "New Cases"),
format.args = list(big.mark = ","))
Most New Cases California Counties
Los Angeles |
809 |
San Diego |
265 |
Orange |
185 |
Fresno |
159 |
San Bernardino |
156 |
kable(county_most_cumulative_cases, caption = "Most Cumulative Cases California Counties",
col.names = c("County", "Total Cases"),
format.args = list(big.mark = ","))
Most Cumulative Cases California Counties
Los Angeles |
253,985 |
Riverside |
55,073 |
Orange |
52,121 |
San Bernardino |
50,699 |
San Diego |
42,742 |
(pop_by_county = PopulationEstimates %>%
filter(State == "CA") %>%
select(pop19 = POP_ESTIMATE_2019, state = State, county = Area_Name, fips = FIPStxt) %>%
group_by(county) %>%
slice_max(pop19, n=1))
## # A tibble: 59 x 4
## # Groups: county [59]
## pop19 state county fips
## <dbl> <chr> <chr> <chr>
## 1 1671329 CA Alameda County 06001
## 2 1129 CA Alpine County 06003
## 3 39752 CA Amador County 06005
## 4 219186 CA Butte County 06007
## 5 45905 CA Calaveras County 06009
## 6 39512223 CA California 06000
## 7 21547 CA Colusa County 06011
## 8 1153526 CA Contra Costa County 06013
## 9 27812 CA Del Norte County 06015
## 10 192843 CA El Dorado County 06017
## # … with 49 more rows
(covid_cum_pop = right_join(pop_by_county, state_covid, by = "fips") %>%
filter(date == max(date)) %>%
mutate(cases_p_cap = (cases/pop19)) %>%
slice_max(cases_p_cap, n = 5) %>%
select(county.y, cases_p_cap))
## # A tibble: 5 x 2
## county.y cases_p_cap
## <chr> <dbl>
## 1 Imperial 0.0622
## 2 Kings 0.0464
## 3 Kern 0.0341
## 4 Tulare 0.0324
## 5 Merced 0.0308
(covid_newcases_pop = right_join(pop_by_county, state_covid, by = "fips") %>%
filter(date == max(date)) %>%
mutate(newcases_p_cap = (newCases/pop19)) %>%
slice_max(newcases_p_cap, n = 5) %>%
select(county.y, newcases_p_cap))
## # A tibble: 5 x 2
## county.y newcases_p_cap
## <chr> <dbl>
## 1 Kings 0.000262
## 2 San Benito 0.000239
## 3 Monterey 0.000203
## 4 Lake 0.000171
## 5 Fresno 0.000159
kable(covid_cum_pop, caption = "Most Cumulative Cases Per Capita California Counties",
col.names = c("County", "Cases Per Capita"),
format.args = list(big.mark = ","))
Most Cumulative Cases Per Capita California Counties
Imperial |
0.0622134 |
Kings |
0.0464038 |
Kern |
0.0341423 |
Tulare |
0.0324199 |
Merced |
0.0307584 |
kable(covid_newcases_pop, caption = "Most Daily New Cases Per Capita California Counties",
col.names = c("County", "Daily New Cases Per Capita"),
format.args = list(big.mark = ","))
Most Daily New Cases Per Capita California Counties
Kings |
0.0002615 |
San Benito |
0.0002388 |
Monterey |
0.0002027 |
Lake |
0.0001708 |
Fresno |
0.0001591 |
covid_pop = right_join(pop_by_county, state_covid, by = "fips")
(newCases_last14days_per100000 = covid_pop %>%
filter(date > max(date) - 14, date < max(date)) %>%
select(county = county.y, newCases, pop19, date) %>%
group_by(county, pop19) %>%
summarise(newCases = sum(newCases, na.rm = TRUE)) %>%
ungroup() %>%
mutate(pop_per_100000 = (pop19/100000), newCases_per100000 = (newCases/pop_per_100000)))
## # A tibble: 58 x 5
## county pop19 newCases pop_per_100000 newCases_per100000
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Alameda 1671329 1797 16.7 108.
## 2 Alpine 1129 0 0.0113 0
## 3 Amador 39752 38 0.398 95.6
## 4 Butte 219186 657 2.19 300.
## 5 Calaveras 45905 62 0.459 135.
## 6 Colusa 21547 48 0.215 223.
## 7 Contra Costa 1153526 1401 11.5 121.
## 8 Del Norte 27812 7 0.278 25.2
## 9 El Dorado 192843 74 1.93 38.4
## 10 Fresno 999101 2096 9.99 210.
## # … with 48 more rows
(total_state_cases = state_covid %>%
filter(date == max(date)) %>%
group_by(county) %>%
summarise(cases = sum(cases, na.rm = TRUE)) %>%
ungroup() %>%
summarise(cases = sum(cases, na.rm = TRUE)) %>%
pull(cases))
## [1] 763389
(total_state_newCases = state_covid %>%
filter(date == max(date)) %>%
group_by(county) %>%
summarise(newCases = sum(newCases, na.rm = TRUE)) %>%
ungroup() %>%
summarise(newCases = sum(newCases, na.rm = TRUE)) %>%
pull(newCases))
## [1] 2808
(healthy_counties = newCases_last14days_per100000 %>%
filter(newCases_per100000 < 100) %>%
pull(county))
## [1] "Alpine" "Amador" "Del Norte" "El Dorado"
## [5] "Humboldt" "Inyo" "Lake" "Lassen"
## [9] "Mariposa" "Mono" "Napa" "Nevada"
## [13] "Placer" "Plumas" "San Francisco" "Shasta"
## [17] "Sierra" "Siskiyou" "Solano" "Tehama"
## [21] "Trinity" "Tuolumne"
There are 7.6338910^{5} total cases in California, 2808 new cases in California, and these are the counties that are deemed safe by the California Department of Public Health based on the number of new cases per 100,000 residents over the past 14 days: Alpine, Amador, Del Norte, El Dorado, Humboldt, Inyo, Lake, Lassen, Mariposa, Mono, Napa, Nevada, Placer, Plumas, San Francisco, Shasta, Sierra, Siskiyou, Solano, Tehama, Trinity, Tuolumne.
Question 2
data2 = covid %>%
filter(state %in% c("California", "New York", "Louisiana", "Florida")) %>%
group_by(state, date) %>%
summarise(cases = sum(cases, na.rn = TRUE)) %>%
ungroup() %>%
group_by(state) %>%
mutate(newCases = cases - lag(cases), roll7 = zoo::rollmean(newCases, 7, fill = NA, allign = "right")) %>%
ungroup() %>%
filter(newCases > 0)
ggplot(data = data2, aes(x = date, y = newCases)) +
geom_col(aes(y = newCases), col = NA, fill = "tomato1") +
geom_line(aes(y = roll7), col = "violetred4", size = 1) +
facet_grid(~state, scales = "free_y")+
theme_update() +
labs(title = paste("New Reported Cases by Day in Select States"), caption = "Lab 2 - Question 2", x = "Date",
y = "New Cases")

newCases_pop = PopulationEstimates %>%
select(pop19 = "POP_ESTIMATE_2019", areaname = "Area_Name") %>%
right_join(data2, by = c("areaname" = "state")) %>%
mutate(cum_cases_percapita = (cases/pop19), newCases_percapita = (newCases/pop19), roll7 = zoo::rollmean(newCases_percapita, 7, fill = NA, allign = "right"))
ggplot(data = newCases_pop, aes(x = date, y = newCases_percapita)) +
geom_col(aes(y = newCases_percapita), col = NA, fill = "turquoise2") +
geom_line(aes(y = roll7), col = "violetred4", size = 1) +
facet_grid(~areaname, scales = "free_y")+
theme_update() +
labs(title = paste("New Reported Cases by Day in Select States Per Capita"), caption = "Lab 2 - Question 2", x = "Date",
y = "New Cases Per Capita")

When scaling the daily new cases to the population of the respected states, we see that although California shows high daily new cases, it has a much smaller daily new cases per capita distribution. On the other hand, we observe Louisiana to have the lowest daily new cases in our comparison, but when estimated based on state population we find it to have a much greater and stark distribution of new cases per capita. New York and Florida roughly follow the same distribution between their daily new cases and daily new cases per capita.