The goal of library(climateapi)
is to minimize repeated data cleaning and wrangling to enable project teams to devote more time to substantive analysis and inference-making. The package works toward this goal by creating a unified interface to common datasets and data manipulation tasks. Functions (will) support both climate-specific datasets as well as those that are climate-adjacent.
Installation
You can install the development version of climateapi from GitHub with:
# install.packages("renv")
renv::install("UI-Research/climateapi")
The climateapi
package at work:
library(climateapi)
library(urbnindicators)
library(sf)
library(tidyverse)
library(urbnthemes)
set_urbn_defaults(style = "print")
ACS Housing and Demographics
Capacity for interacting with data from the American Community Survey is housed in an adjacent package, urbnindicators
.
Visit that package’s webpage and documentation to learn more.
Major Disaster Declarations
county_disaster_declarations = get_fema_disaster_declarations_county(api = TRUE)
county_disaster_declarations %>%
filter(stringr::str_detect(GEOID, "^01")) %>% ## Alabama
group_by(year_declared) %>%
summarize(annual_incidents = sum(incidents_all, na.rm = TRUE)) %>%
ggplot() +
geom_col(aes(x = year_declared, y = annual_incidents)) +
annotate("text", x = 2016.5, y = 132, label = "COVID-19 pandemic" %>% str_wrap(10), fontface = "bold") +
labs(
title = "COVID Results in a Spike of Counties with Disaster Declarations in 2020",
subtitle = "Sum of major disaster declarations per Alabama county, by year",
x = "",
y = "") +
theme_urbn_print()
Wildfire Perimeters and Structures
## take the largest active fire
wildfire_perimeters = get_current_fire_perimeters() %>%
dplyr::arrange(desc(incident_size_acres)) %>%
dplyr::slice(1) %>%
sf::st_transform(5070) %>%
sf::st_make_valid()
## a two-item list
## the first item contains tract-level structure estimates
## the second contains the structure points
impacted_structures = get_structures(
boundaries = wildfire_perimeters,
geography = "tract",
keep_structures = TRUE)
#> | | | 0% | |== | 2% | |==== | 5% | |==== | 6% | |===== | 7% | |===== | 8% | |====== | 8% | |====== | 9% | |======= | 10% | |======= | 11% | |======== | 11% | |======== | 12% | |========= | 13% | |========= | 14% | |========== | 14% | |========== | 15% | |=========== | 15% | |=========== | 16% | |============ | 17% | |============ | 18% | |============= | 18% | |============= | 19% | |============== | 20% | |============== | 21% | |=============== | 21% | |=============== | 22% | |================ | 23% | |================= | 24% | |================= | 25% | |================== | 25% | |================== | 26% | |==================== | 29% | |======================= | 32% | |======================== | 34% | |========================= | 35% | |========================= | 36% | |========================== | 37% | |=========================== | 38% | |============================== | 44% | |================================ | 45% | |================================ | 46% | |================================= | 47% | |================================== | 48% | |=================================== | 50% | |==================================== | 51% | |==================================== | 52% | |===================================== | 53% | |======================================= | 55% | |========================================== | 61% | |============================================ | 62% | |============================================ | 63% | |============================================= | 64% | |=============================================== | 67% | |================================================ | 68% | |================================================ | 69% | |================================================= | 70% | |================================================== | 71% | |================================================== | 72% | |=================================================== | 72% | |===================================================== | 76% | |====================================================== | 77% | |====================================================== | 78% | |======================================================= | 78% | |======================================================= | 79% | |======================================================== | 79% | |======================================================== | 80% | |========================================================= | 81% | |========================================================= | 82% | |=========================================================== | 84% | |=========================================================== | 85% | |============================================================ | 85% | |============================================================ | 86% | |============================================================= | 87% | |============================================================== | 88% | |=============================================================== | 90% | |================================================================ | 91% | |================================================================= | 92% | |================================================================= | 93% | |================================================================== | 94% | |================================================================== | 95% | |=================================================================== | 95% | |==================================================================== | 96% | |======================================================================| 100%
#> Reading layer `AZ_Structures' from data source
#> `C:\Users\wcurrangroome\Box\METRO Climate and Communities Practice Area\github-repository\built-environment\housing-units\usa-structures\raw\AZ\Deliverable20230502AZ\AZ_Structures.gdb'
#> using driver `OpenFileGDB'
#> Simple feature collection with 2701791 features and 28 fields
#> Geometry type: MULTIPOLYGON
#> Dimension: XY
#> Bounding box: xmin: -114.8118 ymin: 31.33255 xmax: -109.0454 ymax: 37.00252
#> Geodetic CRS: WGS 84
#> | | | 0% | |== | 3% | |===== | 7% | |====== | 8% | |====== | 9% | |======= | 10% | |======== | 11% | |======== | 12% | |========= | 13% | |================= | 24% | |================== | 26% | |===================== | 30% | |======================= | 33% | |======================== | 34% | |========================= | 35% | |========================== | 37% | |============================ | 41% | |=============================== | 45% | |================================ | 46% | |================================== | 49% | |=================================== | 50% | |===================================== | 53% | |====================================== | 54% | |=========================================== | 61% | |============================================== | 65% | |================================================ | 68% | |================================================ | 69% | |================================================= | 71% | |==================================================== | 75% | |===================================================== | 76% | |======================================================= | 79% | |========================================================= | 81% | |========================================================== | 83% | |=========================================================== | 84% | |============================================================ | 86% | |============================================================== | 88% | |=============================================================== | 90% | |================================================================ | 91% | |================================================================= | 92% | |=================================================================== | 95% | |==================================================================== | 96% | |==================================================================== | 98% | |======================================================================| 100%
us_tracts_sf = tigris::tracts(cb = TRUE, year = 2023, progress_bar = FALSE) %>%
sf::st_transform(5070)
tracts_sf = us_tracts_sf %>%
sf::st_filter(wildfire_perimeters %>% st_transform(5070) %>% st_buffer(100000)) %>%
dplyr::select(GEOID) %>%
dplyr::left_join(
impacted_structures[[1]] %>%
dplyr::filter(occupancy_class == "Residential") %>%
dplyr::group_by(GEOID) %>%
dplyr::summarize(residential_units = sum(count, na.rm = TRUE)),
by = "GEOID") %>%
dplyr::mutate(county_fips = stringr::str_sub(GEOID, 1, 5)) %>%
dplyr::left_join(
tidycensus::fips_codes %>%
dplyr::mutate(county_fips = stringr::str_c(state_code, county_code)),
by = c("county_fips"))
counties_sf = tracts_sf %>%
dplyr::group_by(county_fips, county) %>%
dplyr::summarize() %>%
dplyr::ungroup() %>%
dplyr::mutate(county = county %>% stringr::str_remove((" County")))
ggplot2::ggplot() +
geom_sf(data = tracts_sf, ggplot2::aes(fill = residential_units), linewidth = .6) +
ggplot2::scale_fill_continuous(na.value = "darkgrey") +
ggplot2::geom_sf(data = counties_sf, fill = NA, color = "black", linewidth = .75) +
ggplot2::geom_sf_text(data = counties_sf, color = "black", ggplot2::aes(label = county), fontface = "bold", size = 3) +
ggplot2::geom_sf(data = wildfire_perimeters, fill = NA, color = "red", linewidth = 1) +
ggplot2::labs(
title = "Estimated Residential Units within Wildfire Boundaries, by Tract",
subtitle = stringr::str_c(
"Incident Name: ", wildfire_perimeters$incident_name, " (",
paste(
tracts_sf %>%
dplyr::filter(!is.na(residential_units)) %>%
dplyr::distinct(state_name) %>%
dplyr::pull(), collapse = ", "), ") \n",
"Incident Size: ", (round(wildfire_perimeters$incident_size_acres, 0) %>% scales::comma()), " acres", "\n"),
fill = "Residential units") +
urbnthemes::theme_urbn_map()
SBA Disaster Loans
sba_disaster_declarations = get_sba_loans()
sba_disaster_declarations %>%
dplyr::mutate(
fiscal_year = as.numeric(fiscal_year),
sba_approved = dplyr::if_else(approved_amount_total > 0, 1, 0)) %>%
## some records, especially those from 2020 onwards, have NA values for approved fields
## for that reason, we'll only look at years predating 2020
## we're also going to exclude FY 2000--there are records for this year, but none
## for the following three years, suggesting some... irregularities in the data
dplyr::filter(
!is.na(sba_approved),
fiscal_year > 2000,
fiscal_year < 2020) %>%
dplyr::group_by(loan_type, sba_approved, fiscal_year) %>%
dplyr::summarize(count = dplyr::n()) %>%
dplyr::ungroup() %>%
dplyr::mutate(
fill = dplyr::case_when(
loan_type == "business" & sba_approved == 1 ~ "Business loans approved",
loan_type == "business" & sba_approved == 0 ~ "Business loans not approved",
loan_type == "residential" & sba_approved == 1 ~ "Residential loans approved",
loan_type == "residential" & sba_approved == 0 ~ "Residential loans not approved")) %>%
ggplot2::ggplot() +
ggplot2::geom_col(ggplot2::aes(x = fiscal_year, y = count, fill = fill)) +
ggplot2::labs(
title = "The Small Business Administration (SBA) Makes Many Residential Loans Post-Disaster",
x = "Fiscal year",
y = "Toal loan applications") +
ggplot2::scale_fill_manual(
values = c(
"Business loans approved" = palette_urbn_cyan[5] %>% as.character,
"Business loans not approved" = palette_urbn_cyan[3] %>% as.character,
"Residential loans approved" = palette_urbn_yellow[5] %>% as.character,
"Residential loans not approved" = palette_urbn_yellow[3] %>% as.character)) +
ggplot2::scale_y_continuous(labels = scales::comma) +
ggplot2::scale_x_continuous(breaks = seq(2004, 2019, 3)) +
ggplot2::guides(fill = ggplot2::guide_legend(nrow = 2, byrow = TRUE))
County Business Patterns
business_patterns = get_business_patterns()
business_patterns %>%
dplyr::filter(employee_size_range_code == "001") %>% ## all sizes
dplyr::group_by(state, county) %>%
dplyr::mutate(
industry_share_payroll = annual_payroll / annual_payroll[industry == "total"]) %>%
dplyr::filter(industry != "total") %>%
dplyr::ungroup() %>%
dplyr::filter(state == "01", county == "001") %>%
dplyr::mutate(industry = industry %>% janitor::make_clean_names(case = "sentence") %>% stringr::str_wrap(40)) %>%
ggplot2::ggplot() +
ggplot2::geom_col(ggplot2::aes(y = stats::reorder(industry, industry_share_payroll), x = industry_share_payroll)) +
ggplot2::labs(
x = "Share of total payroll",
y = "Industry",
title = "Autauga County, AL's Industries (NAICS Codes) by Payroll Share")
Government Expenses
government_finances = get_government_finances()
government_finances %>%
dplyr::filter(state_code == "01", county_code == "001") %>%
dplyr::group_by(government_type) %>%
dplyr::summarize(
amount_millions = sum(amount_thousands, na.rm = TRUE) / 1000,
count = dplyr::n()) %>%
ggplot2::ggplot(aes(y = stats::reorder(government_type, amount_millions) %>% stringr::str_wrap(30), x = amount_millions)) +
ggplot2::geom_col() +
ggplot2::geom_text(ggplot2::aes(label = stringr::str_c("(N = ", count, ")")), hjust = -.25) +
ggplot2::labs(x = "Total annual expenditures (millions, USD)",
y = "",
title = "Autauga County, AL's Expenditures by Government Unit Class",
subtitle = "Government unit counts in parentheses") +
ggplot2::scale_x_continuous(labels = scales::dollar, limits = c(0, 500)) +
ggplot2::theme(panel.grid.major = ggplot2::element_blank())
LEHD Origin Destination Employment Statistics (LODES)
lodes = get_lodes(
lodes_type = "od",
jobs_type = "all",
states = "AL",
years = 2022,
geography = "tract",
## for simplicity, considering only workers who live and work in AL
state_part = "main") %>%
## federal jobs are broken out separately in case users need to standardize
## all-jobs counts over time, but this doesn't apply here
dplyr::filter(job_type == "all")
al_tracts = us_tracts_sf %>%
dplyr::filter(GEOID %>% str_sub(1,2) == "01") %>%
dplyr::select(GEOID) %>%
sf::st_transform(5070)
al_centroids = al_tracts %>%
sf::st_centroid() %>%
sf::st_transform(5070) %>%
sf::st_coordinates() %>%
tibble::as_tibble() %>%
cbind(al_tracts$GEOID) %>%
dplyr::rename(
x = X,
y = Y,
GEOID = 3)
major_al_cities = tidycensus::get_acs(
geography = "place",
variables = c(population = "B01003_001"),
year = 2022,
output = "wide",
state = "AL",
geometry = TRUE) %>%
dplyr::slice_max(populationE, n = 5) %>%
dplyr::transmute(
NAME = NAME %>%
stringr::str_remove_all("CDP|city|town|,|Alabama") %>%
stringr::str_squish() %>%
stringr::str_trim())
#> | | | 0% | |====== | 9% | |==================== | 28% | |================================= | 47% | |=============================================== | 67% | |============================================================ | 86% | |======================================================================| 100%
lodes %>%
dplyr::select(dplyr::matches("GEOID"), total_jobs) %>%
dplyr::left_join(al_centroids, by = c("h_GEOID" = "GEOID")) %>%
dplyr::left_join(al_centroids %>% dplyr::rename(xend = x, yend = y), by = c("w_GEOID" = "GEOID")) %>%
filter(total_jobs > 20) %>%
ggplot() +
geom_sf(data = al_tracts, fill = "lightgrey", color = "darkgrey", linewidth = .5) +
geom_segment(
aes(x = x, y = y, xend = xend, yend = yend), color = palette_urbn_main[1], alpha = .1) +
geom_sf(data = major_al_cities, fill = NA, color = "black") +
geom_sf_text(data = major_al_cities, aes(label = NAME), size = 3, , fontface = "bold", color = "black", vjust = -2) +
theme_urbn_map() +
labs(title = "Employment Commuting Patterns by Tract in Alabama (2022)")