docs(analysis): improve clarity of submission trends and response rate descriptions.
This commit is contained in:
parent
95bba5825d
commit
507cb05265
53
report.Rmd
53
report.Rmd
@ -69,8 +69,12 @@ survey_data <- survey_data %>%
|
|||||||
mutate(CreationDate = mdy_hms(CreationDate))
|
mutate(CreationDate = mdy_hms(CreationDate))
|
||||||
|
|
||||||
# Count and filter records based on exclusion flag
|
# Count and filter records based on exclusion flag
|
||||||
|
start_date <- format(min(survey_data$CreationDate, na.rm = TRUE), "%B %d, %Y")
|
||||||
|
end_date <- format(max(survey_data$CreationDate, na.rm = TRUE), "%B %d, %Y")
|
||||||
excluded_count <- survey_data %>% filter(`Exclude Result` == 1) %>% nrow()
|
excluded_count <- survey_data %>% filter(`Exclude Result` == 1) %>% nrow()
|
||||||
used_count <- survey_data %>% filter(`Exclude Result` == 0) %>% nrow()
|
used_count <- survey_data %>% filter(`Exclude Result` == 0) %>% nrow()
|
||||||
|
total_records <- excluded_count + used_count
|
||||||
|
|
||||||
|
|
||||||
survey_data <- survey_data %>%
|
survey_data <- survey_data %>%
|
||||||
filter(`Exclude Result` == 0)
|
filter(`Exclude Result` == 0)
|
||||||
@ -112,7 +116,7 @@ By understanding planting behavior statewide, DEC can better allocate resources
|
|||||||
|
|
||||||
## Survey Period & Exclusions
|
## Survey Period & Exclusions
|
||||||
|
|
||||||
This analysis covers submissions from **`r format(min(survey_data$CreationDate, na.rm = TRUE), "%B %d, %Y")`** to **`r format(max(survey_data$CreationDate, na.rm = TRUE), "%B %d, %Y")`**, totaling **`r excluded_count + used_count`** records. Of these, **`r used_count`** records were deemed valid and included in the analysis.
|
This analysis covers submissions from **`r start_date`** to **`r end_date`**, totaling **`r total_records`** records. Of these, **`r used_count`** records were deemed valid and included in the analysis.
|
||||||
|
|
||||||
### Excluded Records
|
### Excluded Records
|
||||||
|
|
||||||
@ -140,11 +144,14 @@ To ensure data reliability, multiple validation checks were applied:
|
|||||||
|
|
||||||
These validation processes enhance the accuracy and interpretability of the data, ensuring the results reflect genuine community contributions to the initiative.
|
These validation processes enhance the accuracy and interpretability of the data, ensuring the results reflect genuine community contributions to the initiative.
|
||||||
|
|
||||||
|
|
||||||
# Submission Analysis {.tabset}
|
# Submission Analysis {.tabset}
|
||||||
|
|
||||||
[Back to Top](#)
|
[Back to Top](#)
|
||||||
|
|
||||||
|
## Day of Week
|
||||||
|
|
||||||
|
The chart below shows the distribution of survey submissions by day of the week. It reveals which days respondents were most likely to submit entries, offering insight into user behavior that could inform outreach timing.
|
||||||
|
|
||||||
```{r func-create_histogram, echo=TRUE, message=FALSE}
|
```{r func-create_histogram, echo=TRUE, message=FALSE}
|
||||||
create_histogram <- function(data, field, x_labels = NULL, color_palette = c("#154973", "#457aa5", "#eff6fb", "#face00"),
|
create_histogram <- function(data, field, x_labels = NULL, color_palette = c("#154973", "#457aa5", "#eff6fb", "#face00"),
|
||||||
title = NULL, x_title = NULL, y_title = "Count",
|
title = NULL, x_title = NULL, y_title = "Count",
|
||||||
@ -189,12 +196,6 @@ create_histogram <- function(data, field, x_labels = NULL, color_palette = c("#1
|
|||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
## Day of Week
|
|
||||||
|
|
||||||
The histogram presented below visualizes the number of survey submissions based on the day of the week. Each bar represents the frequency of submissions for a particular day, with the x-axis displaying the days (Monday through Sunday) and the y-axis showing the number of submissions for each corresponding day.
|
|
||||||
|
|
||||||
This chart helps identify any trends in survey participation, such as whether submissions are more frequent at the beginning or end of the week. This could be valuable for understanding user behavior and improving survey timing or outreach strategies.
|
|
||||||
|
|
||||||
```{r create-histogram-day-of-week, echo=TRUE, message=FALSE, fig.height=6, fig.width=8}
|
```{r create-histogram-day-of-week, echo=TRUE, message=FALSE, fig.height=6, fig.width=8}
|
||||||
survey_data %>%
|
survey_data %>%
|
||||||
mutate(DayOfWeek = factor(weekdays(CreationDate),
|
mutate(DayOfWeek = factor(weekdays(CreationDate),
|
||||||
@ -207,6 +208,10 @@ create_histogram(
|
|||||||
color_palette = c("#233f2b", "#7e9084", "#d9e1dd", "#face00"))
|
color_palette = c("#233f2b", "#7e9084", "#d9e1dd", "#face00"))
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## 30 Day Trend
|
||||||
|
|
||||||
|
The following plot displays the number of survey submissions recorded each day over the past 30 days. It highlights short-term trends in participation and identifies periods of high or low activity. A smoothed trend line (dashed) has been added to help visualize patterns.
|
||||||
|
|
||||||
```{r func-plot_submission_trends, echo=TRUE}
|
```{r func-plot_submission_trends, echo=TRUE}
|
||||||
plot_submission_trends <- function(data, days_ago = 30, color_palette = c("#154973", "#457aa5", "#eff6fb", "#face00"),
|
plot_submission_trends <- function(data, days_ago = 30, color_palette = c("#154973", "#457aa5", "#eff6fb", "#face00"),
|
||||||
title = NULL, subtitle = NULL, x_title = "Submission Date", y_title = "Number of Submissions") {
|
title = NULL, subtitle = NULL, x_title = "Submission Date", y_title = "Number of Submissions") {
|
||||||
@ -252,17 +257,11 @@ plot_submission_trends <- function(data, days_ago = 30, color_palette = c("#1549
|
|||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
## 30 Day Trend
|
|
||||||
|
|
||||||
The plot below visualizes the survey submission trends for the past 30 days. It shows the number of submissions made each day, highlighting variations over the last month. This type of plot is helpful for understanding trends in user activity, such as identifying peak submission days, periods of low activity, or gradual changes over time.
|
|
||||||
|
|
||||||
The data used for this plot is filtered to include only submissions made in the last 30 days, with the submission count for each date represented by both the line and the points on the graph. A smoothed trend line (dashed) has been added to help visualize the overall submission pattern over this period.
|
|
||||||
|
|
||||||
```{r plot-submission-trends-30d, echo=TRUE, message=FALSE, fig.height=6, fig.width=8}
|
```{r plot-submission-trends-30d, echo=TRUE, message=FALSE, fig.height=6, fig.width=8}
|
||||||
survey_data$CreationDate <- as.Date(survey_data$CreationDate)
|
survey_data$CreationDate <- as.Date(survey_data$CreationDate)
|
||||||
plot_submission_trends(survey_data,
|
plot_submission_trends(survey_data,
|
||||||
days_ago = 30,
|
days_ago = 30,
|
||||||
color_palette <- c(
|
color_palette = c(
|
||||||
"#233f2b", # primary
|
"#233f2b", # primary
|
||||||
"#7e9084", # secondary
|
"#7e9084", # secondary
|
||||||
"#d9e1dd", # tertiary
|
"#d9e1dd", # tertiary
|
||||||
@ -272,15 +271,13 @@ plot_submission_trends(survey_data,
|
|||||||
|
|
||||||
## 90 Day Trend
|
## 90 Day Trend
|
||||||
|
|
||||||
The plot below visualizes the survey submission trends for the past 90 days. It shows the number of submissions made each day, highlighting variations over the last month. This type of plot is helpful for understanding trends in user activity, such as identifying peak submission days, periods of low activity, or gradual changes over time.
|
This chart presents submission trends over the last 90 days. It provides a broader view of participation patterns, helping to identify sustained surges, lulls, or seasonal effects. The dashed line indicates a smoothed average trend over time.
|
||||||
|
|
||||||
The data used for this plot is filtered to include only submissions made in the last 90 days, with the submission count for each date represented by both the line and the points on the graph. A smoothed trend line (dashed) has been added to help visualize the overall submission pattern over this period.
|
|
||||||
|
|
||||||
```{r plot-submission-trends-90d, echo=TRUE, message=FALSE, fig.height=6, fig.width=8}
|
```{r plot-submission-trends-90d, echo=TRUE, message=FALSE, fig.height=6, fig.width=8}
|
||||||
survey_data$CreationDate <- as.Date(survey_data$CreationDate)
|
survey_data$CreationDate <- as.Date(survey_data$CreationDate)
|
||||||
plot_submission_trends(survey_data,
|
plot_submission_trends(survey_data,
|
||||||
days_ago = 90,
|
days_ago = 90,
|
||||||
color_palette <- c(
|
color_palette = c(
|
||||||
"#233f2b", # primary
|
"#233f2b", # primary
|
||||||
"#7e9084", # secondary
|
"#7e9084", # secondary
|
||||||
"#d9e1dd", # tertiary
|
"#d9e1dd", # tertiary
|
||||||
@ -290,6 +287,8 @@ plot_submission_trends(survey_data,
|
|||||||
|
|
||||||
## Optional Question Response Rates
|
## Optional Question Response Rates
|
||||||
|
|
||||||
|
The table below summarizes response rates for selected optional questions from the survey. For each field, the response rate represents the percentage of participants who provided a valid response. For most fields, any non-missing value is considered a response. However, for **"Total Number of Species Planted"**, only values greater than zero are treated as valid responses.
|
||||||
|
|
||||||
```{r func-calculate_response_rates, echo=TRUE, message=FALSE}
|
```{r func-calculate_response_rates, echo=TRUE, message=FALSE}
|
||||||
# Function to calculate response rates for selected fields
|
# Function to calculate response rates for selected fields
|
||||||
calculate_response_rates <- function(survey_data, fields, caption) {
|
calculate_response_rates <- function(survey_data, fields, caption) {
|
||||||
@ -329,10 +328,6 @@ calculate_response_rates <- function(survey_data, fields, caption) {
|
|||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
The table below summarizes the response rates for optional top-level questions in the survey. These are the questions that all participants are asked, with some triggering additional follow-up questions based on responses. The response rate is the percentage of participants who provided an answer for each question.
|
|
||||||
|
|
||||||
The "Total Number of Species Planted" question has special handling—only responses greater than 0 are considered valid, whereas for other questions, any non-NA value counts as a response.
|
|
||||||
|
|
||||||
```{r response-rate-table-optional, echo=TRUE, message=FALSE, fig.height=6, fig.width=8}
|
```{r response-rate-table-optional, echo=TRUE, message=FALSE, fig.height=6, fig.width=8}
|
||||||
fields <- c("Planter Contact Email (Optional)", "Funding Source (Optional)", "Land Ownership (Optional)",
|
fields <- c("Planter Contact Email (Optional)", "Funding Source (Optional)", "Land Ownership (Optional)",
|
||||||
"Tree Size Planted (Optional)", "Source of Trees (Optional)", "Total Number of Species Planted")
|
"Tree Size Planted (Optional)", "Source of Trees (Optional)", "Total Number of Species Planted")
|
||||||
@ -341,12 +336,12 @@ calculate_response_rates(survey_data, fields, "Response Rates for Key Survey Que
|
|||||||
|
|
||||||
The following provides additional context for each survey question/field, detailing what the percentage represents.
|
The following provides additional context for each survey question/field, detailing what the percentage represents.
|
||||||
|
|
||||||
- **Planter Contact Email**: The percentage of respondents who provided their email address.
|
* **Planter Contact Email**: % of respondents who provided an email address.
|
||||||
- **Funding Source**: The percentage of respondents who identified their funding source.
|
* **Funding Source**: % who reported how their tree planting was funded.
|
||||||
- **Land Ownership**: The percentage of respondents who indicated their land ownership status.
|
* **Land Ownership**: % who identified whether the land is publicly or privately owned.
|
||||||
- **Tree Size Planted**: The percentage of respondents who specified the size of trees they planted.
|
* **Tree Size Planted**: % who specified the size category of planted trees.
|
||||||
- **Source of Trees**: The percentage of respondents who reported the source of the trees they planted.
|
* **Source of Trees**: % who indicated where the trees were sourced.
|
||||||
- **Total Number of Species Planted** : The percentage of respondents who provided the species of tree(s) they planted.
|
* **Total Number of Species Planted**: % who listed at least one species (excluding blanks or 0s).
|
||||||
|
|
||||||
# Participant Analysis {.tabset}
|
# Participant Analysis {.tabset}
|
||||||
|
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user