Libraries

library(tidyverse)
library(knitr)
library(zoo)
library(readxl)

Question 1

covid = read_csv("https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-counties.csv")

PopulationEstimates = read_excel("/users/noblex/github/geog-176A-labs/data/PopulationEstimates.xls", skip = 2)

state.of.interest = "California"

(state_covid = covid %>%
  filter(state == state.of.interest) %>%
  group_by(county) %>%
  mutate(newCases = cases - lag(cases)) %>%
  ungroup())
## # A tibble: 10,479 x 7
##    date       county      state      fips  cases deaths newCases
##    <date>     <chr>       <chr>      <chr> <dbl>  <dbl>    <dbl>
##  1 2020-01-25 Orange      California 06059     1      0       NA
##  2 2020-01-26 Los Angeles California 06037     1      0       NA
##  3 2020-01-26 Orange      California 06059     1      0        0
##  4 2020-01-27 Los Angeles California 06037     1      0        0
##  5 2020-01-27 Orange      California 06059     1      0        0
##  6 2020-01-28 Los Angeles California 06037     1      0        0
##  7 2020-01-28 Orange      California 06059     1      0        0
##  8 2020-01-29 Los Angeles California 06037     1      0        0
##  9 2020-01-29 Orange      California 06059     1      0        0
## 10 2020-01-30 Los Angeles California 06037     1      0        0
## # … with 10,469 more rows
(county_most_new_cases = state_covid %>%
    filter(date == max(date)) %>%
    slice_max(newCases, n = 5) %>%
    select(county, newCases))
## # A tibble: 5 x 2
##   county         newCases
##   <chr>             <dbl>
## 1 Los Angeles         809
## 2 San Diego           265
## 3 Orange              185
## 4 Fresno              159
## 5 San Bernardino      156
(county_most_cumulative_cases = state_covid %>%
    filter(date == max(date)) %>%
    slice_max(cases, n = 5) %>%
    select(county, cases))
## # A tibble: 5 x 2
##   county          cases
##   <chr>           <dbl>
## 1 Los Angeles    253985
## 2 Riverside       55073
## 3 Orange          52121
## 4 San Bernardino  50699
## 5 San Diego       42742
kable(county_most_new_cases, caption = "Most New Cases California Counties",
      col.names = c("County", "New Cases"),
      format.args = list(big.mark = ","))
Most New Cases California Counties
County New Cases
Los Angeles 809
San Diego 265
Orange 185
Fresno 159
San Bernardino 156
kable(county_most_cumulative_cases, caption = "Most Cumulative Cases California Counties",
      col.names = c("County", "Total Cases"),
      format.args = list(big.mark = ","))
Most Cumulative Cases California Counties
County Total Cases
Los Angeles 253,985
Riverside 55,073
Orange 52,121
San Bernardino 50,699
San Diego 42,742
(pop_by_county = PopulationEstimates %>%
    filter(State == "CA") %>%
    select(pop19 = POP_ESTIMATE_2019, state = State, county = Area_Name, fips = FIPStxt) %>%
    group_by(county) %>%
    slice_max(pop19, n=1))
## # A tibble: 59 x 4
## # Groups:   county [59]
##       pop19 state county              fips 
##       <dbl> <chr> <chr>               <chr>
##  1  1671329 CA    Alameda County      06001
##  2     1129 CA    Alpine County       06003
##  3    39752 CA    Amador County       06005
##  4   219186 CA    Butte County        06007
##  5    45905 CA    Calaveras County    06009
##  6 39512223 CA    California          06000
##  7    21547 CA    Colusa County       06011
##  8  1153526 CA    Contra Costa County 06013
##  9    27812 CA    Del Norte County    06015
## 10   192843 CA    El Dorado County    06017
## # … with 49 more rows
(covid_cum_pop = right_join(pop_by_county, state_covid, by = "fips") %>%
    filter(date == max(date)) %>%
    mutate(cases_p_cap = (cases/pop19)) %>%
    slice_max(cases_p_cap, n = 5) %>%
    select(county.y, cases_p_cap))
## # A tibble: 5 x 2
##   county.y cases_p_cap
##   <chr>          <dbl>
## 1 Imperial      0.0622
## 2 Kings         0.0464
## 3 Kern          0.0341
## 4 Tulare        0.0324
## 5 Merced        0.0308
(covid_newcases_pop = right_join(pop_by_county, state_covid, by = "fips") %>%
    filter(date == max(date)) %>%
    mutate(newcases_p_cap = (newCases/pop19)) %>%
    slice_max(newcases_p_cap, n = 5) %>%
    select(county.y, newcases_p_cap))
## # A tibble: 5 x 2
##   county.y   newcases_p_cap
##   <chr>               <dbl>
## 1 Kings            0.000262
## 2 San Benito       0.000239
## 3 Monterey         0.000203
## 4 Lake             0.000171
## 5 Fresno           0.000159
kable(covid_cum_pop, caption = "Most Cumulative Cases Per Capita California Counties",
      col.names = c("County", "Cases Per Capita"),
      format.args = list(big.mark = ","))
Most Cumulative Cases Per Capita California Counties
County Cases Per Capita
Imperial 0.0622134
Kings 0.0464038
Kern 0.0341423
Tulare 0.0324199
Merced 0.0307584
kable(covid_newcases_pop, caption = "Most Daily New Cases Per Capita California Counties",
      col.names = c("County", "Daily New Cases Per Capita"),
      format.args = list(big.mark = ","))
Most Daily New Cases Per Capita California Counties
County Daily New Cases Per Capita
Kings 0.0002615
San Benito 0.0002388
Monterey 0.0002027
Lake 0.0001708
Fresno 0.0001591
covid_pop = right_join(pop_by_county, state_covid, by = "fips")

(newCases_last14days_per100000 = covid_pop %>%
  filter(date > max(date) - 14, date < max(date)) %>%
  select(county = county.y, newCases, pop19, date) %>%
  group_by(county, pop19) %>%
  summarise(newCases = sum(newCases, na.rm = TRUE)) %>%
  ungroup() %>%
  mutate(pop_per_100000 = (pop19/100000), newCases_per100000 = (newCases/pop_per_100000)))
## # A tibble: 58 x 5
##    county         pop19 newCases pop_per_100000 newCases_per100000
##    <chr>          <dbl>    <dbl>          <dbl>              <dbl>
##  1 Alameda      1671329     1797        16.7                 108. 
##  2 Alpine          1129        0         0.0113                0  
##  3 Amador         39752       38         0.398                95.6
##  4 Butte         219186      657         2.19                300. 
##  5 Calaveras      45905       62         0.459               135. 
##  6 Colusa         21547       48         0.215               223. 
##  7 Contra Costa 1153526     1401        11.5                 121. 
##  8 Del Norte      27812        7         0.278                25.2
##  9 El Dorado     192843       74         1.93                 38.4
## 10 Fresno        999101     2096         9.99                210. 
## # … with 48 more rows
(total_state_cases = state_covid %>%
  filter(date == max(date)) %>%
  group_by(county) %>%
  summarise(cases = sum(cases, na.rm = TRUE)) %>%
  ungroup() %>%
  summarise(cases = sum(cases, na.rm = TRUE)) %>%
  pull(cases))
## [1] 763389
(total_state_newCases = state_covid %>%
  filter(date == max(date)) %>%
  group_by(county) %>%
  summarise(newCases = sum(newCases, na.rm = TRUE)) %>%
  ungroup() %>%
  summarise(newCases = sum(newCases, na.rm = TRUE)) %>%
  pull(newCases))
## [1] 2808
(healthy_counties = newCases_last14days_per100000 %>%
  filter(newCases_per100000 < 100) %>%
  pull(county))
##  [1] "Alpine"        "Amador"        "Del Norte"     "El Dorado"    
##  [5] "Humboldt"      "Inyo"          "Lake"          "Lassen"       
##  [9] "Mariposa"      "Mono"          "Napa"          "Nevada"       
## [13] "Placer"        "Plumas"        "San Francisco" "Shasta"       
## [17] "Sierra"        "Siskiyou"      "Solano"        "Tehama"       
## [21] "Trinity"       "Tuolumne"

There are 7.6338910^{5} total cases in California, 2808 new cases in California, and these are the counties that are deemed safe by the California Department of Public Health based on the number of new cases per 100,000 residents over the past 14 days: Alpine, Amador, Del Norte, El Dorado, Humboldt, Inyo, Lake, Lassen, Mariposa, Mono, Napa, Nevada, Placer, Plumas, San Francisco, Shasta, Sierra, Siskiyou, Solano, Tehama, Trinity, Tuolumne.


Question 2

data2 = covid %>%
  filter(state %in% c("California", "New York", "Louisiana", "Florida")) %>%
  group_by(state, date) %>%
  summarise(cases = sum(cases, na.rn = TRUE)) %>%
  ungroup() %>%
  group_by(state) %>%
  mutate(newCases = cases - lag(cases), roll7 = zoo::rollmean(newCases, 7, fill = NA, allign = "right")) %>%
  ungroup() %>% 
  filter(newCases > 0)

  ggplot(data = data2, aes(x = date, y = newCases)) +
  geom_col(aes(y = newCases), col = NA, fill = "tomato1") +
  geom_line(aes(y = roll7), col = "violetred4", size = 1) +
  facet_grid(~state, scales = "free_y")+
  theme_update() +
  labs(title = paste("New Reported Cases by Day in Select States"), caption = "Lab 2 - Question 2", x = "Date",
       y = "New Cases")

newCases_pop = PopulationEstimates %>%
  select(pop19 = "POP_ESTIMATE_2019", areaname = "Area_Name") %>%
  right_join(data2, by = c("areaname" = "state")) %>%
  mutate(cum_cases_percapita = (cases/pop19), newCases_percapita = (newCases/pop19), roll7 = zoo::rollmean(newCases_percapita, 7, fill = NA, allign = "right")) 
  

ggplot(data = newCases_pop, aes(x = date, y = newCases_percapita)) +
  geom_col(aes(y = newCases_percapita), col = NA, fill = "turquoise2") +
  geom_line(aes(y = roll7), col = "violetred4", size = 1) +
  facet_grid(~areaname, scales = "free_y")+
  theme_update() +
  labs(title = paste("New Reported Cases by Day in Select States Per Capita"), caption = "Lab 2 - Question 2", x = "Date",
       y = "New Cases Per Capita")


When scaling the daily new cases to the population of the respected states, we see that although California shows high daily new cases, it has a much smaller daily new cases per capita distribution. On the other hand, we observe Louisiana to have the lowest daily new cases in our comparison, but when estimated based on state population we find it to have a much greater and stark distribution of new cases per capita. New York and Florida roughly follow the same distribution between their daily new cases and daily new cases per capita.