diff --git a/report.Rmd b/report.Rmd index 0dc33e6..b083342 100644 --- a/report.Rmd +++ b/report.Rmd @@ -65,12 +65,25 @@ participant_organizations <- read_csv(participants_path) species_planted <- read_csv(species_path) vendors <- read_csv(vendors_path) +# Clean column names by removing (Required) and (Optional) and trimming whitespace +# Define a function to clean column names +clean_column_names <- function(df) { + colnames(df) <- gsub("\\s*\\(Required\\)|\\s*\\(Optional\\)", "", colnames(df)) + colnames(df) <- str_trim(colnames(df)) + return(df) +} + +# Apply the function to the relevant tibbles +survey_data <- clean_column_names(survey_data) +species_planted <- clean_column_names(species_planted) +participant_organizations <- clean_column_names(participant_organizations) + # Convert relevant date columns to datetime format and recode planting agency responses to standardized labels survey_data <- survey_data %>% mutate(CreationDate = mdy_hms(CreationDate)) %>% - mutate(`Start Date of Planting (Required)` = mdy_hms(`Start Date of Planting (Required)`)) %>% - mutate(`End Date of Planting (Required)` = mdy_hms(`End Date of Planting (Required)`)) %>% - mutate(`Who Planted The Tree(s)? (Required)` = recode(`Who Planted The Tree(s)? (Required)`, + mutate(`Start Date of Planting` = mdy_hms(`Start Date of Planting`)) %>% + mutate(`End Date of Planting` = mdy_hms(`End Date of Planting`)) %>% + mutate(`Who Planted The Tree(s)?` = recode(`Who Planted The Tree(s)?`, "agency" = "State Agency", "community" = "Community Organization", "landowner" = "Private Landowner", @@ -85,7 +98,7 @@ survey_data <- survey_data %>% Submitted_Date_Str = if_else( !is.na(Submitted_Date_Str), - paste0("20", Submitted_Date_Str), # add "20" prefix to "24-11-07" + paste0("20", Submitted_Date_Str), NA_character_ ), @@ -119,30 +132,30 @@ subtitle: "`r format(min(survey_data$CreationDate, na.rm = TRUE), "%B %d, %Y")` ## Key Findings ```{r key-findings-summary} -kf_date_planting_start <- format(min(survey_data$`Start Date of Planting (Required)`, na.rm = TRUE), "%B %d, %Y") -kf_date_planting_end <- format(max(survey_data$`End Date of Planting (Required)`, na.rm = TRUE), "%B %d, %Y") -kf_total_trees <- format(sum(survey_data$`Number of Trees Planted (Required)`), big.mark = ",") +kf_date_planting_start <- format(min(survey_data$`Start Date of Planting`, na.rm = TRUE), "%B %d, %Y") +kf_date_planting_end <- format(max(survey_data$`End Date of Planting`, na.rm = TRUE), "%B %d, %Y") +kf_total_trees <- format(sum(survey_data$`Number of Trees Planted`), big.mark = ",") kf_region_total_trees_ranked <- survey_data %>% group_by(Region) %>% - summarise(Total_Trees = sum(`Number of Trees Planted (Required)`, na.rm = TRUE)) %>% + summarise(Total_Trees = sum(`Number of Trees Planted`, na.rm = TRUE)) %>% arrange(desc(Total_Trees)) kf_participant_total_trees_ranked <- survey_data %>% - group_by(`Who Planted The Tree(s)? (Required)`) %>% - summarise(Total_Trees = sum(`Number of Trees Planted (Required)`, na.rm = TRUE)) %>% + group_by(`Who Planted The Tree(s)?`) %>% + summarise(Total_Trees = sum(`Number of Trees Planted`, na.rm = TRUE)) %>% arrange(desc(Total_Trees)) -kf_dac_total_trees <- sum(survey_data$`Number of Trees Planted (Required)`[!is.na(survey_data$`Disadvantaged Communities Indicator`)], na.rm = TRUE) -kf_dac_percent <- (kf_dac_total_trees / sum(survey_data$`Number of Trees Planted (Required)`, na.rm = TRUE)) * 100 +kf_dac_total_trees <- sum(survey_data$`Number of Trees Planted`[!is.na(survey_data$`Disadvantaged Communities Indicator`)], na.rm = TRUE) +kf_dac_percent <- (kf_dac_total_trees / sum(survey_data$`Number of Trees Planted`, na.rm = TRUE)) * 100 kf_dac_percent_display <- round(kf_dac_percent, 1) kf_generic_tree_type_ranked <- species_planted %>% - filter(!is.na(`Generic Type of Tree (Optional)`)) %>% - count(`Generic Type of Tree (Optional)`, name = "Survey_Count") %>% + filter(!is.na(`Generic Type of Tree`)) %>% + count(`Generic Type of Tree`, name = "Survey_Count") %>% arrange(desc(Survey_Count)) -kf_most_common_generic_tree_type <- kf_generic_tree_type_ranked$`Generic Type of Tree (Optional)`[1] +kf_most_common_generic_tree_type <- kf_generic_tree_type_ranked$`Generic Type of Tree`[1] kf_most_common_generic_tree_type_count <- kf_generic_tree_type_ranked$Survey_Count[1] kf_most_common_generic_tree_type_count_formatted <- format(kf_most_common_generic_tree_type_count, big.mark = ",") @@ -161,9 +174,9 @@ Between **`r kf_date_planting_start` and `r kf_date_planting_end`**, a total of These efforts reflect broad collaboration between **municipal governments**, **community organizations**, **private landowners**, and other stakeholders. - **Most Trees Planted**: The highest number of trees were reported in **`r kf_region_total_trees_ranked$Region[1]`**, followed by **`r kf_region_total_trees_ranked$Region[2]`**. -- **Top Planting Groups**: The most trees, approximately **`r scales::comma(kf_participant_total_trees_ranked$Total_Trees[1])`**, were planted by **`r kf_participant_total_trees_ranked$"Who Planted The Tree(s)? (Required)"[1]`**, followed by **`r kf_participant_total_trees_ranked$"Who Planted The Tree(s)? (Required)"[2]`**, which contributed **`r scales::comma(kf_participant_total_trees_ranked$Total_Trees[2])`** trees. +- **Top Planting Groups**: The most trees, approximately **`r scales::comma(kf_participant_total_trees_ranked$Total_Trees[1])`**, were planted by **`r kf_participant_total_trees_ranked$"Who Planted The Tree(s)?"[1]`**, followed by **`r kf_participant_total_trees_ranked$"Who Planted The Tree(s)?"[2]`**, which contributed **`r scales::comma(kf_participant_total_trees_ranked$Total_Trees[2])`** trees. - **Disadvantaged Communities**: Approximately **`r kf_dac_percent_display`%** of all trees were planted in **Disadvantaged Communities**, as defined by New York State’s Climate Act. -- **Most Reported Tree Genus**: **`r kf_most_common_generic_tree_type`** appeared most frequently, reported in **`r kf_most_common_generic_tree_type_count_formatted`** surveys. +- **Most Reported Tree**: **`r kf_most_common_generic_tree_type`** appeared most frequently, reported in **`r kf_most_common_generic_tree_type_count_formatted`** surveys. - The project received data from **`r kf_total_surveys_formatted` unique surveys**, representing **`r kf_unique_counties_formatted` counties** and **`r kf_unique_municipalities_formatted` municipalities**. These findings help track progress toward equity-centered climate goals, highlight areas of strong participation, and support data-driven planning for future tree planting across the state. @@ -399,8 +412,8 @@ calculate_response_rates <- function(survey_data, fields, caption) { ``` ```{r response-rate-table-optional, echo=TRUE, message=FALSE, fig.height=6, fig.width=8} -fields <- c("Planter Contact Email (Optional)", "Funding Source (Optional)", "Land Ownership (Optional)", - "Tree Size Planted (Optional)", "Source of Trees (Optional)", "Total Number of Species Planted") +fields <- c("Planter Contact Email", "Funding Source", "Land Ownership", + "Tree Size Planted", "Source of Trees", "Total Number of Species Planted") calculate_response_rates(survey_data, fields, "Response Rates for Key Survey Questions") ``` @@ -461,7 +474,7 @@ create_histogram <- function(data, field, x_labels = NULL, color_palette = c("#1 create_histogram( survey_data, - field = "Who Planted The Tree(s)? (Required)", + field = "Who Planted The Tree(s)?", x_labels = c( "agency" = "State Agency", "community" = "Community Organization", @@ -524,8 +537,8 @@ create_bar_chart <- function(data, field, sum_field = NULL, x_labels = NULL, col create_bar_chart( survey_data, - field = "Who Planted The Tree(s)? (Required)", - sum_field = "Number of Trees Planted (Required)", + field = "Who Planted The Tree(s)?", + sum_field = "Number of Trees Planted", x_labels = c( "agency" = "State Agency", "community" = "Community Organization", @@ -586,7 +599,7 @@ This table presents a detailed summary of tree planting activity by participant ```{r participant-type-table, echo=TRUE} survey_data %>% - create_summary_table("Who Planted The Tree(s)? (Required)", "Number of Trees Planted (Required)", remove_na = FALSE, table_font_size = 16) + create_summary_table("Who Planted The Tree(s)?", "Number of Trees Planted", remove_na = FALSE, table_font_size = 16) ``` ## Named User Activity @@ -596,7 +609,7 @@ This table breaks down the number of submissions and trees planted by named user ```{r named-user-activity-table} survey_data %>% mutate(Creator = ifelse(is.na(Creator), "Public User", Creator)) %>% - create_summary_table("Creator", "Number of Trees Planted (Required)", remove_na = FALSE, table_font_size = 16) + create_summary_table("Creator", "Number of Trees Planted", remove_na = FALSE, table_font_size = 16) ``` ## Unique E-mail Activity @@ -605,8 +618,8 @@ This table summarizes the planting activity associated with unique email address ```{r unique-email-activity-table} survey_data %>% - mutate(`Planter Contact Email (Optional)` = ifelse(is.na(`Planter Contact Email (Optional)`), "Not Provided", `Planter Contact Email (Optional)`)) %>% - create_summary_table("Planter Contact Email (Optional)", "Number of Trees Planted (Required)", remove_na = FALSE, table_font_size = 16) + mutate(`Planter Contact Email` = ifelse(is.na(`Planter Contact Email`), "Not Provided", `Planter Contact Email`)) %>% + create_summary_table("Planter Contact Email", "Number of Trees Planted", remove_na = FALSE, table_font_size = 16) ``` ### Municipal Activity @@ -615,13 +628,13 @@ This table presents the number of trees planted by self-reported municipality. I ```{r municipal-activity-table} survey_data %>% - mutate(`Participant Municipality (Optional)` = case_when( - str_starts(`Participant Municipality (Optional)`, "c_") ~ str_replace(`Participant Municipality (Optional)`, "^c_", "") %>% paste0(" (city)"), - str_starts(`Participant Municipality (Optional)`, "v_") ~ str_replace(`Participant Municipality (Optional)`, "^v_", "") %>% paste0(" (village)"), - str_starts(`Participant Municipality (Optional)`, "t_") ~ str_replace(`Participant Municipality (Optional)`, "^t_", "") %>% paste0(" (town)"), - TRUE ~ `Participant Municipality (Optional)` + mutate(`Participant Municipality` = case_when( + str_starts(`Participant Municipality`, "c_") ~ str_replace(`Participant Municipality`, "^c_", "") %>% paste0(" (city)"), + str_starts(`Participant Municipality`, "v_") ~ str_replace(`Participant Municipality`, "^v_", "") %>% paste0(" (village)"), + str_starts(`Participant Municipality`, "t_") ~ str_replace(`Participant Municipality`, "^t_", "") %>% paste0(" (town)"), + TRUE ~ `Participant Municipality` )) %>% - create_summary_table("Participant Municipality (Optional)", "Number of Trees Planted (Required)", remove_na = FALSE, table_font_size = 16) + create_summary_table("Participant Municipality", "Number of Trees Planted", remove_na = FALSE, table_font_size = 16) ``` @@ -632,15 +645,15 @@ This table highlights planting contributions by named organizations, either sele ```{r organization-activity-table} survey_data %>% inner_join(participant_organizations, by = c("GlobalID" = "ParentGlobalID")) %>% - filter(!(is.na(`Participant Organization (Optional)`) & is.na(`Other (Optional)`))) %>% - filter(!(tolower(`Participant Organization (Optional)`) == "other" & is.na(`Other (Optional)`))) %>% - mutate(`Participant Organization (Optional)` = ifelse( - tolower(`Participant Organization (Optional)`) == "other" & !is.na(`Other (Optional)`), - `Other (Optional)`, - `Participant Organization (Optional)` + filter(!(is.na(`Participant Organization`) & is.na(`Other`))) %>% + filter(!(tolower(`Participant Organization`) == "other" & is.na(`Other`))) %>% + mutate(`Participant Organization` = ifelse( + tolower(`Participant Organization`) == "other" & !is.na(`Other`), + `Other`, + `Participant Organization` )) %>% - mutate(`Participant Organization (Optional)` = str_replace_all(`Participant Organization (Optional)`, "_", " ")) %>% - create_summary_table("Participant Organization (Optional)", "Number of Trees Planted (Required)", remove_na = FALSE, table_font_size = 16) + mutate(`Participant Organization` = str_replace_all(`Participant Organization`, "_", " ")) %>% + create_summary_table("Participant Organization", "Number of Trees Planted", remove_na = FALSE, table_font_size = 16) ``` # Location Analysis {.tabset} @@ -700,7 +713,7 @@ Use this map to identify which regions are leading in planting activity, and whe ```{r create-region-choropleth-map, echo=TRUE, message=FALSE, fig.height=6, fig.width=8} survey_data_aggregated <- survey_data %>% group_by(Region) %>% - summarise(total_trees = sum(`Number of Trees Planted (Required)`, na.rm = TRUE)) + summarise(total_trees = sum(`Number of Trees Planted`, na.rm = TRUE)) shapefile_path <- "/home/nick/gitea/tree-tracker-report/data/redc/redc.shp" @@ -732,7 +745,7 @@ plot_geographic_data(joined_data = survey_data_joined, The table below breaks down the total number of trees planted by region. It also shows each region’s percentage contribution to overall planting activity across New York State. ```{r create-summary-table-region, echo=TRUE, message=FALSE, fig.height=6, fig.width=8} -create_summary_table(survey_data, "Region", "Number of Trees Planted (Required)", remove_na = FALSE, table_font_size = 16) +create_summary_table(survey_data, "Region", "Number of Trees Planted", remove_na = FALSE, table_font_size = 16) ``` ## By County @@ -744,7 +757,7 @@ This visual helps uncover local patterns within regions, and may guide localized ```{r create-county-choropleth-map, echo=TRUE, message=FALSE, fig.height=6, fig.width=8} survey_data_aggregated <- survey_data %>% group_by(County) %>% - summarise(total_trees = sum(`Number of Trees Planted (Required)`, na.rm = TRUE)) + summarise(total_trees = sum(`Number of Trees Planted`, na.rm = TRUE)) geographic_data <- counties(state = "NY", cb = TRUE, progress = FALSE) %>% st_as_sf() %>% @@ -769,7 +782,7 @@ plot_geographic_data(joined_data = survey_data_joined, This table provides a detailed breakdown of trees planted by county. Use it alongside the map to validate trends or investigate specific areas. ```{r create-summary-table-county, echo=TRUE, message=FALSE, fig.height=6, fig.width=8} -create_summary_table(survey_data, "County", "Number of Trees Planted (Required)", remove_na = FALSE, table_font_size = 16) +create_summary_table(survey_data, "County", "Number of Trees Planted", remove_na = FALSE, table_font_size = 16) ``` # Tree Analysis {.tabset} @@ -847,7 +860,7 @@ This table summarizes the number and percentage of surveys by **tree genus**. It * **"Not Provided"**: Includes submissions where the genus was not specified. ```{r create-summary-table-genus, echo=TRUE, message=FALSE, fig.height=6, fig.width=8} -create_species_summary_table(species_planted, "Generic Type of Tree (Optional)", "Tree Genus") +create_species_summary_table(species_planted, "Generic Type of Tree", "Tree Genus") ``` --- @@ -861,7 +874,7 @@ This table provides a breakdown of survey submissions by **tree species**. It of * **"Not Provided"**: Surveys that omitted species details. ```{r create-summary-table-species, echo=TRUE, message=FALSE, fig.height=6, fig.width=8} -create_species_summary_table(species_planted, "Tree Species (Optional)", "Tree Species") +create_species_summary_table(species_planted, "Tree Species", "Tree Species") ``` # Disadvantaged Communities {.tabset} @@ -883,7 +896,7 @@ This table presents the total number of trees planted within DACs, grouped by Ne ```{r create-summary-table-region-dac, echo=TRUE, message=FALSE, fig.height=6, fig.width=8} survey_data %>% filter(!is.na(`Disadvantaged Communities Indicator`)) %>% - create_summary_table("Region", "Number of Trees Planted (Required)", remove_na = FALSE, table_font_size = 16) + create_summary_table("Region", "Number of Trees Planted", remove_na = FALSE, table_font_size = 16) ``` --- @@ -895,7 +908,7 @@ This table summarizes tree planting within DACs by **county**. It provides a mor ```{r create-summary-table-county-dac, echo=TRUE, message=FALSE, fig.height=6, fig.width=8} survey_data %>% filter(!is.na(`Disadvantaged Communities Indicator`)) %>% - create_summary_table("County", "Number of Trees Planted (Required)", remove_na = FALSE, table_font_size = 16) + create_summary_table("County", "Number of Trees Planted", remove_na = FALSE, table_font_size = 16) ``` --- @@ -907,5 +920,5 @@ This table breaks down the number of trees planted within DACs by **municipality ```{r create-summary-table-county-municipality, echo=TRUE, message=FALSE, fig.height=6, fig.width=8} survey_data %>% filter(!is.na(`Disadvantaged Communities Indicator`)) %>% - create_summary_table("Municipality", "Number of Trees Planted (Required)", remove_na = FALSE, table_font_size = 16) + create_summary_table("Municipality", "Number of Trees Planted", remove_na = FALSE, table_font_size = 16) ```