Compare commits
2 Commits
cda23941aa
...
cfd4ec0113
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
cfd4ec0113 | ||
|
|
529fe505b6 |
1
.gitignore
vendored
1
.gitignore
vendored
@ -1,2 +1,3 @@
|
|||||||
data/
|
data/
|
||||||
report.html
|
report.html
|
||||||
|
.Rproj.user
|
||||||
|
|||||||
376
report.Rmd
376
report.Rmd
@ -98,84 +98,103 @@ By applying these validation checks, the integrity and consistency of the data i
|
|||||||
|
|
||||||
## Submission Analysis {.tabset}
|
## Submission Analysis {.tabset}
|
||||||
|
|
||||||
### Submission Trend Analysis
|
### Submissions by Day of Week
|
||||||
|
The histogram presented below visualizes the number of survey submissions based on the day of the week. Each bar represents the frequency of submissions for a particular day, with the x-axis displaying the days (Monday through Sunday) and the y-axis showing the number of submissions for each corresponding day.
|
||||||
|
|
||||||
```{r submission-trend-stats, echo=FALSE, message=FALSE}
|
This chart helps identify any trends in survey participation, such as whether submissions are more frequent at the beginning or end of the week. This could be valuable for understanding user behavior and improving survey timing or outreach strategies.
|
||||||
## library(dplyr)
|
|
||||||
|
```{r submission-histogram-survey-submissions-day-of-week, echo=FALSE, message=FALSE, fig.height=6, fig.width=8}
|
||||||
|
library(dplyr)
|
||||||
|
library(ggplot2)
|
||||||
|
|
||||||
|
# Assuming 'survey_data' is your tibble
|
||||||
|
survey_data %>%
|
||||||
|
mutate(DayOfWeek = factor(weekdays(CreationDate),
|
||||||
|
levels = c("Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"))) %>% # Set order of days
|
||||||
|
ggplot(aes(x = DayOfWeek)) +
|
||||||
|
geom_bar(stat = "count") + # Create the histogram (bar plot)
|
||||||
|
geom_text(aes(label = after_stat(count)), stat = "count", vjust = -0.25) + # Add labels above the bars
|
||||||
|
xlab("Day of the Week") +
|
||||||
|
ylab("Number of Submissions") +
|
||||||
|
theme(axis.text.x = element_text(angle = 45, hjust = 1)) # Angle labels for better readability
|
||||||
|
```
|
||||||
|
|
||||||
|
```{r func-plot_submission_trends, echo=FALSE}
|
||||||
|
# Load necessary libraries
|
||||||
|
library(tidyverse)
|
||||||
|
|
||||||
|
# Custom color palette
|
||||||
|
custom_palette <- c(
|
||||||
|
"#233f28", # primary
|
||||||
|
"#7e9084", # secondary
|
||||||
|
"#d9e1dd", # tertiary
|
||||||
|
"#face00" # accent
|
||||||
|
)
|
||||||
|
|
||||||
# Ensure CreationDate is in Date format
|
|
||||||
survey_data$CreationDate <- as.Date(survey_data$CreationDate)
|
survey_data$CreationDate <- as.Date(survey_data$CreationDate)
|
||||||
|
|
||||||
# Summarize the data to calculate the total number of submissions by CreationDate
|
# Define the function to plot survey submission trends
|
||||||
summary_data <- survey_data %>%
|
plot_submission_trends <- function(data, days_ago = 30) {
|
||||||
filter(`Exclude Result` == 0) %>%
|
|
||||||
group_by(CreationDate) %>%
|
# Calculate the start date (days_ago days before today)
|
||||||
summarise(total_submissions = n(), .groups = "drop")
|
start_date <- Sys.Date() - days_ago
|
||||||
|
|
||||||
# Number of days that have elapsed between the first and last submission date
|
# Filter the data based on the calculated start date (up to today)
|
||||||
date_range <- range(summary_data$CreationDate)
|
submission_trends <- data %>%
|
||||||
elapsed_days <- as.integer(difftime(date_range[2], date_range[1], units = "days"))
|
filter(CreationDate >= start_date) %>%
|
||||||
|
group_by(CreationDate) %>%
|
||||||
# Number of days with 0 submissions
|
summarize(submissions = n())
|
||||||
all_dates <- data.frame(CreationDate = seq.Date(date_range[1], date_range[2], by = "day"))
|
|
||||||
merged_data <- left_join(all_dates, summary_data, by = "CreationDate")
|
# Create the plot
|
||||||
days_with_0_submissions <- sum(is.na(merged_data$total_submissions))
|
ggplot(submission_trends, aes(x = CreationDate, y = submissions)) +
|
||||||
|
geom_line(color = custom_palette[1], linewidth = 1) + # Line color from palette
|
||||||
# Summary statistics based on the count of submissions
|
geom_point(color = custom_palette[1], size = 3, shape = 16) + # Points for visibility
|
||||||
submission_summary <- summary(merged_data$total_submissions, na.rm = TRUE)
|
labs(
|
||||||
|
title = "Survey Submission Trends by Date",
|
||||||
# Dates where submissions exceeded the 3rd quartile
|
subtitle = paste("Tracking submissions for the last", days_ago, "days"),
|
||||||
third_quartile <- quantile(merged_data$total_submissions, 0.75, na.rm = TRUE)
|
x = "Submission Date",
|
||||||
dates_above_3rd_quartile <- merged_data %>%
|
y = "Number of Submissions"
|
||||||
filter(total_submissions > third_quartile) %>%
|
) +
|
||||||
pull(CreationDate)
|
theme_minimal() +
|
||||||
|
theme(
|
||||||
|
plot.title = element_text(hjust = 0.5, face = "bold", size = 16),
|
||||||
|
plot.subtitle = element_text(hjust = 0.5, size = 12, color = "grey40"),
|
||||||
|
axis.title.x = element_text(color = "black", size = 12),
|
||||||
|
axis.title.y = element_text(color = "black", size = 12),
|
||||||
|
axis.text = element_text(color = "black", size = 10),
|
||||||
|
panel.grid.major = element_line(color = "grey90"),
|
||||||
|
panel.grid.minor = element_blank(),
|
||||||
|
axis.text.x = element_text(angle = 45, hjust = 1) # Rotate x-axis labels
|
||||||
|
) +
|
||||||
|
# Add a smoothed trend line (loess)
|
||||||
|
geom_smooth(method = "loess", color = custom_palette[4], linewidth = 1, linetype = "dashed")
|
||||||
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
The survey has been active for **`r elapsed_days`** days.During this period **`r days_with_0_submissions`** days had no submission.
|
### 30 Day Trend
|
||||||
|
The plot below visualizes the survey submission trends for the past 30 days. It shows the number of submissions made each day, highlighting variations over the last month. This type of plot is helpful for understanding trends in user activity, such as identifying peak submission days, periods of low activity, or gradual changes over time.
|
||||||
|
|
||||||
The following visualization illustrates the trend in the total number of submissions throughout the survey period, providing insights into any patterns or changes in submission activity.
|
The data used for this plot is filtered to include only submissions made in the last 30 days, with the submission count for each date represented by both the line and the points on the graph. A smoothed trend line (dashed) has been added to help visualize the overall submission pattern over this period.
|
||||||
|
|
||||||
```{r submission-trend-plot, echo=FALSE, message=FALSE, fig.height=6, fig.width=8}
|
|
||||||
#library(ggplot2)
|
|
||||||
|
|
||||||
# Plot Submission Trend
|
|
||||||
ggplot(summary_data, aes(x = CreationDate, y = total_submissions)) +
|
|
||||||
geom_line(color = "#233f28", linewidth = 1) +
|
|
||||||
geom_point(color = "#7e9084", size = 3) +
|
|
||||||
geom_smooth(method = "loess", color = "#face00", linewidth = 1, linetype = "dashed") +
|
|
||||||
labs(
|
|
||||||
title = "Total Number of Submissions by Date",
|
|
||||||
x = "Submission Date",
|
|
||||||
y = "Total Number of Submissions"
|
|
||||||
) +
|
|
||||||
theme_minimal(base_size = 14) +
|
|
||||||
theme(
|
|
||||||
plot.title = element_text(size = 16, face = "bold", color = "#233f28"),
|
|
||||||
axis.title = element_text(size = 12, color = "#233f28"),
|
|
||||||
axis.text = element_text(size = 10, color = "#233f28"),
|
|
||||||
plot.margin = margin(10, 10, 10, 10),
|
|
||||||
panel.grid.major = element_line(color = "#d9e1dd", linewidth = 0.3),
|
|
||||||
panel.background = element_rect(fill = "#d9e1dd"),
|
|
||||||
axis.text.x = element_text(angle = 45, hjust = 1)
|
|
||||||
) +
|
|
||||||
scale_x_date(date_labels = "%b %Y", date_breaks = "1 months")
|
|
||||||
|
|
||||||
|
```{r plot-submission-trends-30d, echo=FALSE, message=FALSE, fig.height=6, fig.width=8}
|
||||||
|
plot_submission_trends(survey_data, days_ago = 30)
|
||||||
```
|
```
|
||||||
|
|
||||||
### Survey Response Rates by Field
|
### 90 Day Trend
|
||||||
The table below shows the response rates for a selection of optional fields within the survey. Each field represents a different aspect of the survey, and the response rate reflects the percentage of respondents who provided valid answers for each field.
|
The plot below visualizes the survey submission trends for the past 90 days. It shows the number of submissions made each day, highlighting variations over the last month. This type of plot is helpful for understanding trends in user activity, such as identifying peak submission days, periods of low activity, or gradual changes over time.
|
||||||
|
|
||||||
- **Planter Contact Email**: The percentage of respondents who provided their email address.
|
The data used for this plot is filtered to include only submissions made in the last 90 days, with the submission count for each date represented by both the line and the points on the graph. A smoothed trend line (dashed) has been added to help visualize the overall submission pattern over this period.
|
||||||
- **Funding Source**: The percentage of respondents who identified their funding source.
|
|
||||||
- **Land Ownership**: The percentage of respondents who indicated their land ownership status.
|
|
||||||
- **Tree Size Planted**: The percentage of respondents who specified the size of trees they planted.
|
|
||||||
- **Source of Trees**: The percentage of respondents who reported the source of the trees they planted.
|
|
||||||
- **Species Planted**: The percentage of respondents who provided the species of tree(s) they planted.
|
|
||||||
|
|
||||||
This breakdown helps identify which survey fields received higher levels of engagement, and which may require further clarification or encouragement to improve response rates.
|
```{r plot-submission-trends-90d, echo=FALSE, message=FALSE}
|
||||||
|
plot_submission_trends(survey_data, days_ago = 90)
|
||||||
|
```
|
||||||
|
|
||||||
```{r response-rate, echo=FALSE, message=FALSE}
|
### Response Rates to Top-Level Optional Questions
|
||||||
|
The table below summarizes the response rates for optional key top-level questions in the survey. These are the questions that all participants are asked, with some triggering additional follow-up questions based on responses. The response rate is the percentage of participants who provided an answer for each question.
|
||||||
|
|
||||||
|
The "Total Number of Species Planted" question has special handling—only responses greater than 0 are considered valid, whereas for other questions, any non-NA value counts as a response.
|
||||||
|
|
||||||
|
```{r optonal-top-level-question-response-rate-table, echo=FALSE, message=FALSE, fig.height=6, fig.width=8}
|
||||||
# List of fields to check for response rates, with special handling for 'Total Number of Species Planted'
|
# List of fields to check for response rates, with special handling for 'Total Number of Species Planted'
|
||||||
fields <- c("Planter Contact Email", "Funding Source", "Land Ownership",
|
fields <- c("Planter Contact Email", "Funding Source", "Land Ownership",
|
||||||
"Tree Size Planted", "Source of Trees", "Total Number of Species Planted")
|
"Tree Size Planted", "Source of Trees", "Total Number of Species Planted")
|
||||||
@ -197,27 +216,47 @@ response_rates_rounded <- round(response_rates, 2)
|
|||||||
# Sort the response rates in descending order (highest to lowest)
|
# Sort the response rates in descending order (highest to lowest)
|
||||||
sorted_response_rates <- sort(response_rates_rounded, decreasing = TRUE)
|
sorted_response_rates <- sort(response_rates_rounded, decreasing = TRUE)
|
||||||
|
|
||||||
# Print the sorted, rounded response rates
|
# Create a clean data frame with the field names and their response rates
|
||||||
sorted_response_rates
|
response_rate_table <- data.frame(
|
||||||
|
"Field" = names(sorted_response_rates),
|
||||||
|
"Response Rate (%)" = sorted_response_rates,
|
||||||
|
stringsAsFactors = FALSE # Ensure the "Field" column is treated as character, not factor
|
||||||
|
)
|
||||||
|
|
||||||
|
# Remove the row names (the extra column that appears as a result of conversion)
|
||||||
|
rownames(response_rate_table) <- NULL
|
||||||
|
|
||||||
|
# Fix column names to ensure proper headers
|
||||||
|
colnames(response_rate_table) <- c("Field", "Response Rate (%)")
|
||||||
|
|
||||||
|
# Display the table using kable for better formatting
|
||||||
|
library(knitr)
|
||||||
|
kable(response_rate_table, caption = "Response Rates for Key Survey Questions", align = "l")
|
||||||
```
|
```
|
||||||
|
|
||||||
## Participant Type Analysis {.tabset}
|
The following provides additional context for each survey question/field, detailing what the percentage represents.
|
||||||
|
|
||||||
### Number of Submissions
|
- **Planter Contact Email**: The percentage of respondents who provided their email address.
|
||||||
The first visualization shows the distribution of the number of tree planting surveys based on the participant type. This breakdown helps highlight which groups are contributing most to the tree planting initiative.
|
- **Funding Source**: The percentage of respondents who identified their funding source.
|
||||||
|
- **Land Ownership**: The percentage of respondents who indicated their land ownership status.
|
||||||
|
- **Tree Size Planted**: The percentage of respondents who specified the size of trees they planted.
|
||||||
|
- **Source of Trees**: The percentage of respondents who reported the source of the trees they planted.
|
||||||
|
- **Total Number of Species Planted **: The percentage of respondents who provided the species of tree(s) they planted.
|
||||||
|
|
||||||
```{r participant-type-surveys, echo=FALSE, message=FALSE}
|
## Participant Analysis {.tabset}
|
||||||
#library(ggplot2)
|
The following section contains an analysis of tree planting by participant type.
|
||||||
#library(dplyr)
|
|
||||||
|
|
||||||
|
### Submissions
|
||||||
|
The following plot shows the distribution of survey submissions based on participant type. This breakdown highlights the contributions of each participant group to the tree planting initiative.
|
||||||
|
|
||||||
|
```{r participant-type-surveys, echo=FALSE, message=FALSE, fig.height=6, fig.width=8}
|
||||||
ggplot(survey_data, aes(x = `Who Planted The Tree(s)?`)) +
|
ggplot(survey_data, aes(x = `Who Planted The Tree(s)?`)) +
|
||||||
geom_bar(fill = "#233f28", color = "#7e9084") +
|
geom_bar(fill = "#233f28", color = "#7e9084") +
|
||||||
geom_text(stat = "count", aes(label = scales::comma(after_stat(count))),
|
geom_text(stat = "count", aes(label = scales::comma(after_stat(count))),
|
||||||
position = position_stack(vjust = 0.5), # Places text in the middle of the bars
|
position = position_stack(vjust = 0.5), # Places text in the middle of the bars
|
||||||
color = "#ffffff", size = 5) + # Adjust label size
|
color = "#face00", size = 5) + # Use accent color for text labels
|
||||||
labs(
|
labs(
|
||||||
title = "Number of Tree Planting Submissions by Participant Type",
|
title = "Distribution of Tree Planting Submissions by Participant Type",
|
||||||
x = "Participant Type",
|
x = "Participant Type",
|
||||||
y = "Number of Submissions"
|
y = "Number of Submissions"
|
||||||
) +
|
) +
|
||||||
@ -241,28 +280,22 @@ ggplot(survey_data, aes(x = `Who Planted The Tree(s)?`)) +
|
|||||||
|
|
||||||
```
|
```
|
||||||
|
|
||||||
### Total Trees Planted
|
### Trees Planted
|
||||||
This second plot provides a breakdown of the total number of trees planted by participant type. This visualization helps to assess the contribution of each participant group to the overall impact of the tree planting program.
|
This plot visualizes the total number of trees planted by each participant type, helping to evaluate the overall impact of different groups in the tree planting program.
|
||||||
|
|
||||||
```{r participant-type-planted, echo=FALSE, message=FALSE}
|
```{r participant-type-planted, echo=FALSE, message=FALSE, fig.height=6, fig.width=8}
|
||||||
library(ggplot2)
|
|
||||||
library(dplyr)
|
|
||||||
|
|
||||||
summary_data <- survey_data %>%
|
summary_data <- survey_data %>%
|
||||||
group_by(`Who Planted The Tree(s)?`) %>%
|
group_by(`Who Planted The Tree(s)?`) %>%
|
||||||
summarise(total_trees = sum(`Number of Trees Planted`, na.rm = TRUE))
|
summarise(total_trees = sum(`Number of Trees Planted`, na.rm = TRUE))
|
||||||
|
|
||||||
library(ggplot2)
|
|
||||||
library(dplyr)
|
|
||||||
|
|
||||||
# Assuming 'summary_data' is already defined
|
|
||||||
ggplot(summary_data, aes(x = `Who Planted The Tree(s)?`, y = total_trees)) +
|
ggplot(summary_data, aes(x = `Who Planted The Tree(s)?`, y = total_trees)) +
|
||||||
geom_bar(stat = "identity", fill = "#233f28", color = "#7e9084") +
|
geom_bar(stat = "identity", fill = "#233f28", color = "#7e9084") +
|
||||||
geom_text(aes(label = scales::comma(total_trees)),
|
geom_text(aes(label = scales::comma(total_trees)),
|
||||||
position = position_stack(vjust = 0.5), # Places text in the middle of the bars
|
position = position_stack(vjust = 0.5), # Places text in the middle of the bars
|
||||||
color = "#ffffff", size = 5) + # Accent color for text labels
|
color = "#face00", size = 5) + # Accent color for text labels
|
||||||
labs(
|
labs(
|
||||||
title = "Total Number of Trees Planted by Participant Type",
|
title = "Contribution of Each Participant Type to Total Trees Planted",
|
||||||
x = "Participant Type",
|
x = "Participant Type",
|
||||||
y = "Total Number of Trees Planted"
|
y = "Total Number of Trees Planted"
|
||||||
) +
|
) +
|
||||||
@ -285,7 +318,9 @@ ggplot(summary_data, aes(x = `Who Planted The Tree(s)?`, y = total_trees)) +
|
|||||||
)
|
)
|
||||||
```
|
```
|
||||||
|
|
||||||
```{r participant-type-table, echo=FALSE, message=FALSE}
|
The following table provides a breakdown of the total number of trees planted by participant type. It shows both the total number of trees planted by each group and their proportional contribution to the overall planting efforts. This information helps assess which participant types have contributed the most to the tree planting program.
|
||||||
|
|
||||||
|
```{r participant-type-table, echo=FALSE, message=FALSE, fig.height=6, fig.width=8}
|
||||||
# Summarize the data to calculate the total number of trees planted by participant type
|
# Summarize the data to calculate the total number of trees planted by participant type
|
||||||
summary_data <- survey_data %>%
|
summary_data <- survey_data %>%
|
||||||
group_by(`Who Planted The Tree(s)?`) %>%
|
group_by(`Who Planted The Tree(s)?`) %>%
|
||||||
@ -312,127 +347,74 @@ summary_data_formatted <- summary_data %>%
|
|||||||
percentage = paste0(round(percentage, 1), "%") # Round percentage and append '%'
|
percentage = paste0(round(percentage, 1), "%") # Round percentage and append '%'
|
||||||
)
|
)
|
||||||
|
|
||||||
# Print the table
|
|
||||||
summary_data_formatted %>%
|
summary_data_formatted %>%
|
||||||
knitr::kable(col.names = c("Participant Type", "Total Trees Planted", "Percentage of Total Trees"),
|
knitr::kable(col.names = c("Participant Type", "Total Trees Planted", "Percentage of Total Trees"),
|
||||||
caption = "Total Number of Trees Planted by Participant Type and their Proportional Contribution") %>%
|
caption = "Breakdown of Total Trees Planted by Participant Type and Their Contribution to the Overall Tree Planting Effort",
|
||||||
kableExtra::kable_styling(full_width = F, position = "center", bootstrap_options = c("striped", "hover"))
|
align = c("l", "c", "c")) %>% # Align Participant Type left, and others center
|
||||||
```
|
kableExtra::kable_styling(
|
||||||
|
full_width = F,
|
||||||
|
position = "center",
|
||||||
## Region Overview
|
bootstrap_options = c("striped", "hover"),
|
||||||
This section provides an overview of regional involved and response to the tree planting survey.
|
font_size = 14,
|
||||||
|
fixed_thead = TRUE
|
||||||
In the table below, we aggregate plantings by Region. The results are provided in descending order of Total Trees Planted.
|
|
||||||
```{r region-summary, echo=FALSE, warning=FALSE, message=FALSE}
|
|
||||||
# Summarize the data by Region
|
|
||||||
region_summary_data <- survey_data %>%
|
|
||||||
group_by(Region) %>%
|
|
||||||
summarise(
|
|
||||||
total_records = n(), # Count the number of records in each region
|
|
||||||
total_trees_planted = sum(`Number of Trees Planted`, na.rm = TRUE), # Sum of trees planted in each region
|
|
||||||
mean_trees_planted = mean(`Number of Trees Planted`, na.rm = TRUE), # Mean number of trees planted
|
|
||||||
median_trees_planted = median(`Number of Trees Planted`, na.rm = TRUE) # Median number of trees planted
|
|
||||||
) %>%
|
) %>%
|
||||||
arrange(desc(total_trees_planted)) # Sort by total trees planted in descending order
|
kableExtra::column_spec(1, width = "20em", bold = TRUE) %>% # Participant Type column bold and wider
|
||||||
|
kableExtra::column_spec(2, width = "12em", color = "black") %>% # Total Trees column
|
||||||
# Format the table to display the total number of records and trees planted
|
kableExtra::column_spec(3, width = "12em", color = "black") %>% # Percentage column
|
||||||
region_summary_data_formatted <- region_summary_data %>%
|
kableExtra::add_footnote("Total number of trees and percentage represent each participant's contribution to the overall tree planting effort.")
|
||||||
mutate(
|
|
||||||
total_trees_planted = scales::comma(total_trees_planted), # Add commas to the total number of trees
|
|
||||||
total_records = scales::comma(total_records), # Add commas to the total number of records
|
|
||||||
mean_trees_planted = round(mean_trees_planted, 1), # Round mean for readability
|
|
||||||
median_trees_planted = round(median_trees_planted, 1) # Round median for readability
|
|
||||||
)
|
|
||||||
|
|
||||||
# Print the summary table
|
|
||||||
region_summary_data_formatted %>%
|
|
||||||
knitr::kable(col.names = c("Region", "Total Submissions", "Total Trees Planted", "Mean", "Median"),
|
|
||||||
caption = "Total Records, Trees Planted, Mean, and Median by Region (Sorted by Trees Planted)") %>%
|
|
||||||
kableExtra::kable_styling(full_width = F, position = "center", bootstrap_options = c("striped", "hover"))
|
|
||||||
|
|
||||||
```
|
|
||||||
|
|
||||||
## County Overview
|
|
||||||
This section provides an overview of counties involved and response to the tree planting survey.
|
|
||||||
|
|
||||||
In the table below, we aggregate plantings by County. The results are provided in descending order of Total Trees Planted.
|
|
||||||
```{r county-summary, echo=FALSE, warning=FALSE, message=FALSE}
|
|
||||||
# Summarize the data by Region
|
|
||||||
county_summary_data <- survey_data %>%
|
|
||||||
group_by(County) %>%
|
|
||||||
summarise(
|
|
||||||
total_records = n(), # Count the number of records in each county
|
|
||||||
total_trees_planted = sum(`Number of Trees Planted`, na.rm = TRUE), # Sum of trees planted in each region
|
|
||||||
mean_trees_planted = mean(`Number of Trees Planted`, na.rm = TRUE), # Mean number of trees planted
|
|
||||||
median_trees_planted = median(`Number of Trees Planted`, na.rm = TRUE) # Median number of trees planted
|
|
||||||
) %>%
|
|
||||||
arrange(desc(total_trees_planted)) # Sort by total trees planted in descending order
|
|
||||||
|
|
||||||
# Format the table to display the total number of records and trees planted
|
|
||||||
county_summary_data_formatted <- county_summary_data %>%
|
|
||||||
mutate(
|
|
||||||
total_trees_planted = scales::comma(total_trees_planted), # Add commas to the total number of trees
|
|
||||||
total_records = scales::comma(total_records), # Add commas to the total number of records
|
|
||||||
mean_trees_planted = round(mean_trees_planted, 1), # Round mean for readability
|
|
||||||
median_trees_planted = round(median_trees_planted, 1) # Round median for readability
|
|
||||||
)
|
|
||||||
|
|
||||||
# Print the summary table
|
|
||||||
county_summary_data_formatted %>%
|
|
||||||
knitr::kable(col.names = c("County", "Total Submissions", "Total Trees Planted", "Mean", "Median"),
|
|
||||||
caption = "Total Records, Trees Planted, Mean, and Median by County (Sorted by Trees Planted)") %>%
|
|
||||||
kableExtra::kable_styling(full_width = F, position = "center", bootstrap_options = c("striped", "hover"))
|
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
## Species Overview
|
## Location Analysis{.tabset}
|
||||||
The following section contains details on species plantings. These results indicate the number of occurrences where the tree species was planted. They are not necessarily the number of those trees planted, but can be used to indicate popularity.
|
|
||||||
|
|
||||||
```{r species-detail, echo=FALSE, message=FALSE}
|
```{r func-create_summary_table, echo=FALSE}
|
||||||
#library(tidyverse)
|
create_summary_table <- function(data, field) {
|
||||||
# Count unique values in 'Generic.Species.of.Tree' and 'Precise.Species.of.Tree', handling NA and sorting
|
# Summarize the data based on the field provided
|
||||||
generic_species_count <- species_data %>%
|
summary_data <- data %>%
|
||||||
count(`Generic.Species.of.Tree`) %>%
|
group_by(!!sym(field)) %>% # Dynamically use the provided field name
|
||||||
mutate(
|
summarise(
|
||||||
`Generic.Species.of.Tree` = if_else(is.na(`Generic.Species.of.Tree`), "Null Response", `Generic.Species.of.Tree`),
|
submissions = n(), # Count of submissions
|
||||||
`Generic.Species.of.Tree` = str_replace_all(`Generic.Species.of.Tree`, "_", " "), # Replace underscores with spaces
|
total_trees = sum(`Number of Trees Planted`, na.rm = TRUE) # Sum of trees planted
|
||||||
`Generic.Species.of.Tree` = str_to_title(`Generic.Species.of.Tree`) # Convert to Title Case
|
) %>%
|
||||||
) %>%
|
mutate(
|
||||||
arrange(desc(n)) # Sort by count in descending order
|
submissions_percentage = submissions / sum(submissions) * 100, # Proportion of submissions
|
||||||
|
trees_percentage = total_trees / sum(total_trees) * 100 # Proportion of trees planted
|
||||||
|
)
|
||||||
|
|
||||||
precise_species_count <- species_data %>%
|
# Format the table to display commas for the totals and round percentages
|
||||||
count(`Precise.Species.of.Tree`) %>%
|
summary_data_formatted <- summary_data %>%
|
||||||
mutate(
|
mutate(
|
||||||
`Precise.Species.of.Tree` = if_else(is.na(`Precise.Species.of.Tree`), "Null Response", `Precise.Species.of.Tree`),
|
submissions = scales::comma(submissions),
|
||||||
`Precise.Species.of.Tree` = str_replace_all(`Precise.Species.of.Tree`, "_", " "), # Replace underscores with spaces
|
total_trees = scales::comma(total_trees),
|
||||||
`Precise.Species.of.Tree` = str_to_title(`Precise.Species.of.Tree`) # Convert to Title Case
|
submissions_percentage = paste0(round(submissions_percentage, 1), "%"),
|
||||||
) %>%
|
trees_percentage = paste0(round(trees_percentage, 1), "%")
|
||||||
arrange(desc(n)) # Sort by count in descending order
|
)
|
||||||
|
|
||||||
# Print the results
|
# Create and style the table
|
||||||
print(generic_species_count)
|
summary_data_formatted %>%
|
||||||
print(precise_species_count)
|
knitr::kable(col.names = c(field, "Number of Submissions", "Number of Trees Planted", "Proportion of Submissions (%)", "Proportion of Trees Planted (%)"),
|
||||||
|
caption = paste("Summary of Submissions and Trees Planted by", field),
|
||||||
|
align = c("l", "c", "c", "c", "c")) %>%
|
||||||
|
kableExtra::kable_styling(
|
||||||
|
full_width = F,
|
||||||
|
position = "center",
|
||||||
|
bootstrap_options = c("striped", "hover"),
|
||||||
|
font_size = 14
|
||||||
|
) %>%
|
||||||
|
kableExtra::column_spec(1, width = "20em", bold = TRUE) %>% # First column bold and wider
|
||||||
|
kableExtra::column_spec(2, width = "12em") %>% # Total Trees column
|
||||||
|
kableExtra::column_spec(3, width = "12em") %>% # Percentage column
|
||||||
|
kableExtra::add_footnote("The proportions represent the percentage of submissions and trees planted for each category relative to the overall dataset.")
|
||||||
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
## Tree Count
|
### By Region
|
||||||
In this section, we present summary statistics for the number of trees planted by all participants in various tree planting surveys.
|
```{r create-summary-table-region, echo=FALSE, message=FALSE}
|
||||||
|
create_summary_table(survey_data, "Region")
|
||||||
```{r summary-stats, echo=FALSE, warning=FALSE, message=FALSE}
|
|
||||||
# Calculate summary statistics
|
|
||||||
summary_stats <- summary(survey_data$`Number of Trees Planted`, na.rm = TRUE)
|
|
||||||
```
|
```
|
||||||
|
|
||||||
Below is a summary of the `Number of Trees Planted` across participants:
|
### By County
|
||||||
|
```{r create-summary-table-county, echo=FALSE, message=FALSE}
|
||||||
| Statistic | Value |
|
create_summary_table(survey_data, "County")
|
||||||
|-------------|-------------|
|
```
|
||||||
| Min | `r summary_stats["Min"]` |
|
|
||||||
| 1st Qu. | `r summary_stats["1st Qu."]` |
|
|
||||||
| Median | `r summary_stats["Median"]` |
|
|
||||||
| Mean | `r summary_stats["Mean"]` |
|
|
||||||
| 3rd Qu. | `r summary_stats["3rd Qu."]` |
|
|
||||||
| Max | `r summary_stats["Max"]` |
|
|
||||||
|
|
||||||
The summary statistics for the number of trees planted provide insight into the distribution of trees planted by all participants in the tree planting surveys. While the median value gives us a sense of the "typical" number of trees planted, the mean might be skewed by a few participants planting a very large number of trees.
|
|
||||||
Loading…
Reference in New Issue
Block a user