refactor: simplify setup chunk by loading tidyverse and organizing code.
This commit is contained in:
parent
62fbbce3f6
commit
75e0c840a4
63
report.Rmd
63
report.Rmd
@ -23,50 +23,59 @@ geometry: margin=1in
|
|||||||
---
|
---
|
||||||
|
|
||||||
```{r setup, include=FALSE}
|
```{r setup, include=FALSE}
|
||||||
|
# Global setup
|
||||||
knitr::opts_chunk$set(
|
knitr::opts_chunk$set(
|
||||||
echo = TRUE,
|
echo = TRUE,
|
||||||
message = FALSE,
|
message = FALSE,
|
||||||
warning = FALSE
|
warning = FALSE
|
||||||
)
|
)
|
||||||
# Load necessary libraries
|
|
||||||
#library(dplyr)
|
set.seed(123) # For reproducibility
|
||||||
#library(ggplot2)
|
|
||||||
library(knitr)
|
# Load tidyverse and additional necessary libraries
|
||||||
library(lubridate)
|
|
||||||
library(RColorBrewer)
|
|
||||||
library(scales)
|
|
||||||
library(sf)
|
|
||||||
library(tidyverse)
|
library(tidyverse)
|
||||||
|
library(sf)
|
||||||
library(tigris)
|
library(tigris)
|
||||||
|
library(scales)
|
||||||
|
library(RColorBrewer)
|
||||||
library(viridis)
|
library(viridis)
|
||||||
|
|
||||||
# Load survey data files from CSV as tibbles.
|
# Define file paths
|
||||||
survey_data <- read_csv("data/_25_Million_Trees_Initiative_Survey_0.csv")
|
survey_path <- "data/_25_Million_Trees_Initiative_Survey_0.csv"
|
||||||
location_points <- read_csv("data/location_points_1.csv")
|
locations_pt_path <- "data/location_points_1.csv"
|
||||||
location_polygons <- read_csv("data/location_polygons_2.csv")
|
locations_poly_path <- "data/location_polygons_2.csv"
|
||||||
participant_organizations <- read_csv("data/participant_organizations_3.csv")
|
participants_path <- "data/participant_organizations_3.csv"
|
||||||
species_planted <- read_csv("data/species_planted_4.csv")
|
species_path <- "data/species_planted_4.csv"
|
||||||
vendors <- read_csv("data/vendors_5.csv")
|
vendors_path <- "data/vendors_5.csv"
|
||||||
|
|
||||||
# Transform date stored as character or numeric vectors to POSIXct objects.
|
# Check for expected files
|
||||||
|
stopifnot(file.exists(survey_path))
|
||||||
|
stopifnot(file.exists(locations_pt_path))
|
||||||
|
stopifnot(file.exists(locations_poly_path))
|
||||||
|
stopifnot(file.exists(participants_path))
|
||||||
|
stopifnot(file.exists(species_path))
|
||||||
|
stopifnot(file.exists(vendors_path))
|
||||||
|
|
||||||
|
# Load survey and related datasets
|
||||||
|
survey_data <- read_csv(survey_path)
|
||||||
|
location_points <- read_csv(locations_pt_path)
|
||||||
|
location_polygons <- read_csv(locations_poly_path)
|
||||||
|
participant_organizations <- read_csv(participants_path)
|
||||||
|
species_planted <- read_csv(species_path)
|
||||||
|
vendors <- read_csv(vendors_path)
|
||||||
|
|
||||||
|
# Convert character dates to POSIXct
|
||||||
survey_data <- survey_data %>%
|
survey_data <- survey_data %>%
|
||||||
mutate(CreationDate = mdy_hms(CreationDate))
|
mutate(CreationDate = mdy_hms(CreationDate))
|
||||||
|
|
||||||
# Count the records to be excluded (Exclude Result == 1)
|
# Count and filter records based on exclusion flag
|
||||||
excluded_count <- survey_data %>%
|
excluded_count <- survey_data %>% filter(`Exclude Result` == 1) %>% nrow()
|
||||||
filter(`Exclude Result` == 1) %>%
|
used_count <- survey_data %>% filter(`Exclude Result` == 0) %>% nrow()
|
||||||
nrow()
|
|
||||||
|
|
||||||
# Count the records that are used (Exclude Result == 0)
|
|
||||||
used_count <- survey_data %>%
|
|
||||||
filter(`Exclude Result` == 0) %>%
|
|
||||||
nrow()
|
|
||||||
|
|
||||||
# Ignore excluded data.
|
|
||||||
survey_data <- survey_data %>%
|
survey_data <- survey_data %>%
|
||||||
filter(`Exclude Result` == 0)
|
filter(`Exclude Result` == 0)
|
||||||
|
|
||||||
# Join the data based on the ParentGlobalID, ensuring all rows from survey_data are retained
|
# Join related datasets by GlobalID
|
||||||
combined_data <- survey_data %>%
|
combined_data <- survey_data %>%
|
||||||
left_join(location_points, by = c("GlobalID" = "ParentGlobalID")) %>%
|
left_join(location_points, by = c("GlobalID" = "ParentGlobalID")) %>%
|
||||||
left_join(location_polygons, by = c("GlobalID" = "ParentGlobalID")) %>%
|
left_join(location_polygons, by = c("GlobalID" = "ParentGlobalID")) %>%
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user