# Load necessary libraries
library(tidyverse)

# Load the CSV files into R
survey_data <- read_csv("data/_25_Million_Trees_Initiative_Survey_0.csv")
location_points <- read_csv("data/location_points_1.csv")
location_polygons <- read_csv("data/location_polygons_2.csv")
participant_organizations <- read_csv("data/participant_organizations_3.csv")
species_planted <- read_csv("data/species_planted_4.csv")
vendors <- read_csv("data/vendors_5.csv")

# View the structure of each data frame to check the relevant columns for joining
glimpse(survey_data)
glimpse(location_points)
glimpse(location_polygons)
glimpse(participant_organizations)
glimpse(species_planted)
glimpse(vendors)

# Join the data based on the ParentGlobalID, ensuring all rows from survey_data are retained
combined_data <- survey_data %>%
  left_join(location_points, by = c("GlobalID" = "ParentGlobalID")) %>%
  left_join(location_polygons, by = c("GlobalID" = "ParentGlobalID")) %>%
  left_join(participant_organizations, by = c("GlobalID" = "ParentGlobalID")) %>%
  left_join(species_planted, by = c("GlobalID" = "ParentGlobalID")) %>%
  left_join(vendors, by = c("GlobalID" = "ParentGlobalID"))

# View the combined data to ensure everything is merged correctly
glimpse(combined_data)