diff --git a/report.Rmd b/report.Rmd index 7b7daa4..55dff1d 100644 --- a/report.Rmd +++ b/report.Rmd @@ -23,50 +23,59 @@ geometry: margin=1in --- ```{r setup, include=FALSE} +# Global setup knitr::opts_chunk$set( - echo = TRUE, - message = FALSE, - warning = FALSE + echo = TRUE, + message = FALSE, + warning = FALSE ) -# Load necessary libraries -#library(dplyr) -#library(ggplot2) -library(knitr) -library(lubridate) -library(RColorBrewer) -library(scales) -library(sf) + +set.seed(123) # For reproducibility + +# Load tidyverse and additional necessary libraries library(tidyverse) +library(sf) library(tigris) +library(scales) +library(RColorBrewer) library(viridis) -# Load survey data files from CSV as tibbles. -survey_data <- read_csv("data/_25_Million_Trees_Initiative_Survey_0.csv") -location_points <- read_csv("data/location_points_1.csv") -location_polygons <- read_csv("data/location_polygons_2.csv") -participant_organizations <- read_csv("data/participant_organizations_3.csv") -species_planted <- read_csv("data/species_planted_4.csv") -vendors <- read_csv("data/vendors_5.csv") +# Define file paths +survey_path <- "data/_25_Million_Trees_Initiative_Survey_0.csv" +locations_pt_path <- "data/location_points_1.csv" +locations_poly_path <- "data/location_polygons_2.csv" +participants_path <- "data/participant_organizations_3.csv" +species_path <- "data/species_planted_4.csv" +vendors_path <- "data/vendors_5.csv" -# Transform date stored as character or numeric vectors to POSIXct objects. +# Check for expected files +stopifnot(file.exists(survey_path)) +stopifnot(file.exists(locations_pt_path)) +stopifnot(file.exists(locations_poly_path)) +stopifnot(file.exists(participants_path)) +stopifnot(file.exists(species_path)) +stopifnot(file.exists(vendors_path)) + +# Load survey and related datasets +survey_data <- read_csv(survey_path) +location_points <- read_csv(locations_pt_path) +location_polygons <- read_csv(locations_poly_path) +participant_organizations <- read_csv(participants_path) +species_planted <- read_csv(species_path) +vendors <- read_csv(vendors_path) + +# Convert character dates to POSIXct survey_data <- survey_data %>% mutate(CreationDate = mdy_hms(CreationDate)) -# Count the records to be excluded (Exclude Result == 1) -excluded_count <- survey_data %>% - filter(`Exclude Result` == 1) %>% - nrow() +# Count and filter records based on exclusion flag +excluded_count <- survey_data %>% filter(`Exclude Result` == 1) %>% nrow() +used_count <- survey_data %>% filter(`Exclude Result` == 0) %>% nrow() -# Count the records that are used (Exclude Result == 0) -used_count <- survey_data %>% - filter(`Exclude Result` == 0) %>% - nrow() - -# Ignore excluded data. survey_data <- survey_data %>% filter(`Exclude Result` == 0) -# Join the data based on the ParentGlobalID, ensuring all rows from survey_data are retained +# Join related datasets by GlobalID combined_data <- survey_data %>% left_join(location_points, by = c("GlobalID" = "ParentGlobalID")) %>% left_join(location_polygons, by = c("GlobalID" = "ParentGlobalID")) %>%