knitr::opts_chunk$set(warning = FALSE, message = FALSE)
# remove everything and restart
rm(list = ls())
library(ggplot2)
library(sf)
library(dplyr)
library(tidyr)
library(viridis)
library(readr)
library(ggsignif)
formula <- y ~ x # needed for ggpmisc's equation and R2 text
# Define the Albers Equal Area Conic CRS appropriate for CONUS
albers_crs <- st_crs("+proj=aea +lat_1=29.5 +lat_2=45.5 +lat_0=37.5 +lon_0=-96 +x_0=0 +y_0=0 +datum=WGS84 +units=m +no_defs")
indir <- "H:/My Drive/Projects/Beavers/Beaver_EarthEngine/BeaverPonds"
outdir <- "H:/My Drive/Projects/Beavers/Beaver_EarthEngine/BeaverPonds/OutFigs"
outfig <- "H:/My Drive/Manuscript/BeavePond_westernUS/Figs"
1. load data
load(paste0(outdir,"/pond_polygon_eco_cluster.RData"))
pond_polygon_eco_cluster_df <- st_drop_geometry(pond_polygon_eco_cluster)
2. get ponds data in ecoregions
names(pond_polygon_eco_cluster_df)
## [1] "CustomID" "count" "pondAream2" "DamID"
## [5] "DamLenM" "FolderPath" "buffer" "pondsAream2"
## [9] "Name" "US_L4CODE" "US_L4NAME" "US_L3CODE"
## [13] "US_L3NAME" "NA_L3CODE" "NA_L3NAME" "NA_L2CODE"
## [17] "NA_L2NAME" "NA_L1CODE" "NA_L1NAME" "L4_KEY"
## [21] "L3_KEY" "L2_KEY" "L1_KEY" "ClusterID"
## [25] "clusterAream2" "EEAGER_"
pond_eco <- pond_polygon_eco_cluster_df[,c("FolderPath","Name","NA_L3CODE","NA_L3NAME")]
pond_eco_unique <- unique(pond_eco)
level_dam <- pond_polygon_eco_cluster_df[,c("DamID","pondsAream2","NA_L3CODE","NA_L3NAME")]
level_dam_unique <- unique(level_dam)
3. plot
# Update ecoregion labels to include both name and code
ecoregion_labels <- c(
"10.1.4" = "Wyoming Basin (10.1.4)",
"9.3.1" = "Northwestern Glaciated Plains (9.3.1)",
"6.2.14" = "Southern Rockies (6.2.14)",
"6.2.4" = "Canadian Rockies (6.2.4)",
"6.2.8" = "Eastern Cascades Slopes and Foothills (6.2.8)"
)
# Add updated Ecoregion column to level_dam_unique
level_dam_unique$Ecoregion <- factor(
level_dam_unique$NA_L3CODE,
levels = names(ecoregion_labels),
labels = ecoregion_labels
)
# Precompute group statistics including Ecoregion
group_stats <- level_dam_unique %>%
group_by(NA_L3CODE, Ecoregion) %>%
summarise(
Nb = n(),
Mean = mean(log10(pondsAream2)),
Median = median(log10(pondsAream2)),
Max = max(log10(pondsAream2)) # Get the max value for positioning
)
# Create the plot with updated legend and no violin outline
# Updated legend positioning and style
ggplot(level_dam_unique, aes(x = NA_L3CODE, y = log10(pondsAream2), fill = Ecoregion)) +
geom_violin() + # Remove outline with `color = NA`
geom_boxplot(width = 0.06, outlier.shape = NA, fill = "white") +
labs(
title = "Distribution of beaver pond area across level III ecoregions",
# x = "Ecoregion",
y = "Log10 of pond area (m²)"
) +
theme_bw(base_size = 12) +
theme(
plot.title = element_text(hjust = 0.5),
axis.title.y = element_text(size = 10),
axis.title.x = element_blank(),
legend.title = element_blank(), # Remove legend title
legend.text = element_text(size = 10),
legend.position = "bottom", # Position legend below the plot
legend.box = "horizontal" # Arrange legend horizontally
) +
guides(fill = guide_legend(nrow = 3, byrow = TRUE)) +
scale_fill_manual(
values = c("Wyoming Basin (10.1.4)" = "#e78ac3",
"Northwestern Glaciated Plains (9.3.1)" = "#a6d854",
"Southern Rockies (6.2.14)" = "#fc8d62",
"Canadian Rockies (6.2.4)" = "#8da0cb",
"Eastern Cascades Slopes and Foothills (6.2.8)" = "#66c2a5"),
breaks = c(
"Canadian Rockies (6.2.4)",
"Eastern Cascades Slopes and Foothills (6.2.8)",
"Southern Rockies (6.2.14)",
"Northwestern Glaciated Plains (9.3.1)",
"Wyoming Basin (10.1.4)"
)
) +
geom_text(
data = group_stats,
aes(x = NA_L3CODE, y = Max + 0.2, label = paste("n =", Nb)),
color = "black", vjust = 0, size = 4
) +
geom_signif(
comparisons = list(c("9.3.1", "6.2.14")),
map_signif_level = TRUE,
y_position = max(log10(level_dam_unique$pondsAream2)) * 1.05,
tip_length = 0.02,
textsize = 4
) +
ylim(NA, max(log10(level_dam_unique$pondsAream2)) * 1.2)

4. save plot
# fname <- paste0(outfig,'/ecoregion_level_III.png')
# ggsave(filename = fname, plot = last_plot(), dpi = 600, width = 6.5, units = 'in')