Main Figures
Have a legend for all plots
Since there’s a set parameter in set_colors.R
file that was sourced at the beginning, we can use this throughout to can assure that the below legends will represent the legends called afterwards.
####### Legend for season
legend_plot <- ggplot(metadata, aes(y = frac_bacprod, x = fraction, fill = fraction, shape = season)) +
geom_jitter(size = 3, width = 0.2) +
scale_fill_manual(values = fraction_colors, guide = guide_legend(override.aes = list(shape = 22, size = 4))) +
scale_shape_manual(values = season_shapes, guide = guide_legend(override.aes = list(size = 4))) +
theme(legend.position = "bottom", axis.title.x = element_blank(),
legend.title = element_blank(), legend.justification="center",
plot.margin = unit(c(0.2,0.2,1,0.2), "cm")) # top, right, bottom, and left margins
# Extract the legend
season_legend <- cowplot::get_legend(legend_plot + theme(legend.direction = "horizontal",legend.justification="center" ,legend.box.just = "bottom"))
### Make it 2 rows for plots that are skinnier
####### Legend for season
season_2row_plot <- ggplot(metadata, aes(y = frac_bacprod, x = fraction, fill = fraction, shape = season)) +
geom_jitter(size = 3, width = 0.2) +
scale_fill_manual(values = fraction_colors, guide = guide_legend(override.aes = list(shape = 22, size = 4))) +
scale_shape_manual(values = season_shapes, guide = guide_legend(override.aes = list(size = 4))) +
theme(legend.position = "bottom", legend.justification="center",
axis.title.x = element_blank(),
legend.title = element_blank(), legend.box = "vertical",
legend.spacing.y = unit(0.1, 'cm'),
plot.margin = unit(c(0.2,0.2,1,0.2), "cm")) # top, right, bottom, and left margins
# Extract the legend
season_2row_legend <- cowplot::get_legend(season_2row_plot + theme(legend.direction = "horizontal",legend.justification="center" ,legend.box.just = "bottom"))
####### Legend for lakesite
lakesite_legend <- ggplot(metadata, aes(y = frac_bacprod, x = fraction, fill = fraction, shape = lakesite)) +
geom_jitter(size = 3, width = 0.2) +
scale_fill_manual(values = fraction_colors, guide = guide_legend(override.aes = list(shape = 22, size = 4))) +
scale_shape_manual(values = lakesite_shapes, guide = guide_legend(override.aes = list(size = 4))) +
theme(legend.position = "bottom", axis.title.x = element_blank(),
legend.title = element_blank(), legend.justification="center",
plot.margin = unit(c(0.2,0.2,1,0.2), "cm")) # top, right, bottom, and left margins
# Extract the legend
lakesite_legend <- cowplot::get_legend(lakesite_legend + theme(legend.direction = "horizontal",legend.justification="center" ,legend.box.just = "bottom"))
Figure 1
######################################################### Fraction ABUNDANCe
frac_abund_wilcox <- wilcox.test(log10(as.numeric(fraction_bac_abund)) ~ fraction, data = metadata)
frac_abund_wilcox
##
## Wilcoxon rank sum test
##
## data: log10(as.numeric(fraction_bac_abund)) by fraction
## W = 0, p-value = 1.479e-06
## alternative hypothesis: true location shift is not equal to 0
metadata %>%
group_by(fraction) %>%
summarize(mean(as.numeric(fraction_bac_abund)))
## # A tibble: 2 x 2
## fraction `mean(as.numeric(fraction_bac_abund))`
## <fct> <dbl>
## 1 Particle 41169.
## 2 Free 734522.
# Make a data frame to draw significance line in boxplot (visually calculated)
dat1 <- data.frame(a = c(1.1,1.1,1.9,1.9), b = c(6.45,6.5,6.5,6.45)) # WholePart vs WholeFree
poster_a <-
ggplot(filter(metadata, norep_filter_name != "MOTEJ515"),
aes(y = log10(as.numeric(fraction_bac_abund)), x = fraction)) +
geom_jitter(size = 3, aes(fill = fraction, shape = season), width = 0.2) +
geom_boxplot(alpha = 0.5, outlier.shape = NA, aes( fill = fraction)) +
#ylab("Log10(Bacterial Counts) \n (cells/mL)") +
ylab(expression(atop(log[10]*"(Bacterial Counts)", "(cells/mL)"))) +
scale_fill_manual(values = fraction_colors) +
scale_shape_manual(values = season_shapes) +
##### Particle vs free cell abundances
geom_path(data = dat1, aes(x = a, y = b), linetype = 1, color = "gray40") +
annotate("text", x=1.5, y=6.55, size = 8, color = "gray40", label= paste("***")) +
annotate("text", x=1.5, y=6.4, size = 3.5, color = "gray40",
label= paste("p =", round(frac_abund_wilcox$p.value, digits = 6))) +
theme(legend.position = "bottom",
legend.title = element_blank(),
axis.title.x = element_blank())
######################################################### TOTAL PRODUCTION
totprod_wilcox <- wilcox.test(frac_bacprod ~ fraction, data = metadata)
totprod_wilcox
##
## Wilcoxon rank sum test
##
## data: frac_bacprod by fraction
## W = 33, p-value = 0.02418
## alternative hypothesis: true location shift is not equal to 0
metadata %>%
group_by(fraction) %>%
summarize(mean(frac_bacprod))
## # A tibble: 2 x 2
## fraction `mean(frac_bacprod)`
## <fct> <dbl>
## 1 Particle 9.96
## 2 Free 24.1
# Make a data frame to draw significance line in boxplot (visually calculated)
dat2 <- data.frame(a = c(1.1,1.1,1.9,1.9), b = c(71,72,72,71)) # WholePart vs WholeFree
poster_b <- ggplot(metadata, aes(y = frac_bacprod, x = fraction)) +
geom_jitter(size = 3, aes(fill = fraction, shape = season), width = 0.2) +
geom_boxplot(alpha = 0.5, outlier.shape = NA, aes( fill = fraction)) +
scale_fill_manual(values = fraction_colors, guide = FALSE) +
scale_shape_manual(values = season_shapes) +
scale_y_continuous(limits = c(0, 78), expand = c(0,0), breaks = seq(0, 75, by = 15)) +
#ylab("Community Production \n (μgC/L/day)") +
ylab(expression(atop("Community Production", "(μgC/L/day)"))) +
##### Particle vs free bulk production
geom_path(data = dat2, aes(x = a, y = b), linetype = 1, color = "gray40") +
annotate("text", x=1.5, y=73, size = 8, color = "gray40", label= paste("*")) +
annotate("text", x=1.5, y=69, size = 3.5, color = "gray40",
label= paste("p =", round(totprod_wilcox$p.value, digits = 3))) +
theme(legend.position = "none",
axis.title.x = element_blank())
######################################################### TOTAL PRODUCTION
percellprod_wilcox <- wilcox.test(log10(fracprod_per_cell) ~ fraction, data = filter(metadata, norep_filter_name != "MOTEJ515"))
percellprod_wilcox
##
## Wilcoxon rank sum test
##
## data: log10(fracprod_per_cell) by fraction
## W = 125, p-value = 6.656e-05
## alternative hypothesis: true location shift is not equal to 0
filter(metadata, norep_filter_name != "MOTEJ515") %>%
group_by(fraction) %>%
summarize(mean(fracprod_per_cell))
## # A tibble: 2 x 2
## fraction `mean(fracprod_per_cell)`
## <fct> <dbl>
## 1 Particle 0.000000482
## 2 Free 0.0000000387
# Make a data frame to draw significance line in boxplot (visually calculated)
dat3 <- data.frame(a = c(1.1,1.1,1.9,1.9), b = c(-5.05,-5,-5,-5.05)) # WholePart vs WholeFree
line_2c <- expression("log" ~~ sqrt(x, y) ~~ "or this" ~~ sum(x[i], i==1, n) ~~ "math expression")
poster_c <- ggplot(filter(metadata, norep_filter_name != "MOTEJ515"),
aes(y = log10(fracprod_per_cell), x = fraction)) +
geom_jitter(size = 3, aes(fill = fraction, shape = season), width = 0.2) +
geom_boxplot(alpha = 0.5, outlier.shape = NA, aes( fill = fraction)) +
scale_fill_manual(values = fraction_colors, guide = FALSE) +
scale_shape_manual(values = season_shapes) +
ylim(c(-8.5, -4.9)) +
ylab(expression(atop(log[10]*"(Per-Capita Production)", "(μgC/cell/day)"))) +
#ylab("Log10(Per-Capita Production) \n(μgC/cell/day)") +
##### Particle vs free per-cell production
geom_path(data = dat3, aes(x = a, y = b), linetype = 1, color = "gray40") +
annotate("text", x=1.5, y=-4.95, size = 8, color = "gray40", label= paste("***")) +
annotate("text", x=1.5, y=-5.15, size = 3.5, color = "gray40",
label= paste("p =", round(percellprod_wilcox$p.value, digits = 5))) +
theme(legend.position = "none",
axis.title.x = element_blank())
######## FIGURE 1
row1_plots <- plot_grid(poster_a + theme(legend.position = "none"), poster_b, poster_c,
labels = c("A", "B", "C"),
ncol = 3, nrow = 1)
fig_1 <- plot_grid(row1_plots, season_legend,
ncol = 1, nrow = 2,
rel_heights = c(1, 0.08)); fig_1
# What are the averages?
metadata %>%
group_by(fraction) %>%
dplyr::select(frac_bacprod, fracprod_per_cell_noinf) %>%
na.omit() %>%
summarize(avg_comm_prod = mean(frac_bacprod),
avg_percell_prod = mean(fracprod_per_cell_noinf),
log10_avg_percell_prod = log10(avg_percell_prod))
## # A tibble: 2 x 4
## fraction avg_comm_prod avg_percell_prod log10_avg_percell_prod
## <fct> <dbl> <dbl> <dbl>
## 1 Particle 9.83 0.000000482 -6.32
## 2 Free 24.1 0.0000000387 -7.41
Figure 2
site_div_order <- c("richness","shannon","simpson","unweighted_sesmpd",
"phylo_richness", "phylo_shannon","phylo_simpson", "weighted_sesmpd")
site_div_labs <- c("{}^0*italic(D)", "{}^1*italic(D)","{}^2*italic(D)", "italic(Unweighted_MPD)",
"{}^0*italic(PD)", "{}^1*italic(PD)","{}^2*italic(PD)", "italic(Weighted_MPD)")
long_div_meta_df <-
wide_div_meta_df %>%
dplyr::select(c(norep_filter_name, richness:weighted_sesmpd, fraction, lakesite, season)) %>%
gather(key = Diversity, value = div_val, richness:weighted_sesmpd) %>%
mutate(Diversity = fct_relevel(Diversity, levels = site_div_order),
hill_labels = factor(Diversity, labels = site_div_labs))
wc_pafl_pvals1 <- c(wilcox.test(richness ~ fraction, data = wide_div_meta_df)$p.value,
wilcox.test(shannon ~ fraction, data = wide_div_meta_df)$p.value,
wilcox.test(simpson ~ fraction, data = wide_div_meta_df)$p.value,
wilcox.test(unweighted_sesmpd ~ fraction, data = wide_div_meta_df)$p.value,
# Row 2
wilcox.test(phylo_richness ~ fraction, data = wide_div_meta_df)$p.value,
wilcox.test(phylo_shannon ~ fraction, data = wide_div_meta_df)$p.value,
wilcox.test(phylo_simpson ~ fraction, data = wide_div_meta_df)$p.value,
wilcox.test(weighted_sesmpd ~ fraction, data = wide_div_meta_df)$p.value)
pafl_pvals_adjusted <- p.adjust(wc_pafl_pvals1, method = "fdr")
wc_pafl_pvals <- c(paste("p=", round(pafl_pvals_adjusted[1], digits = 3), sep = ""),
paste("p=", round(pafl_pvals_adjusted[2], digits = 3), sep = ""),
paste("N", "S", sep = ""),
paste("p=", round(pafl_pvals_adjusted[4], digits = 2), sep = ""),
# Row 2
paste("p=", round(pafl_pvals_adjusted[5], digits = 3),sep = ""),
paste("p=", round(pafl_pvals_adjusted[6], digits = 3), sep = ""),
paste("p=", round(pafl_pvals_adjusted[7], digits = 3), sep = ""),
paste("N", "S", sep = ""))
fraction <- as.character(rep("Free", 8))
# Maximum Diversity Values by Fraction
wide_div_meta_df %>%
group_by(fraction) %>%
summarize(max_rich = max(richness), max_shan = max(shannon), max_simps = max(simpson), max_umpd = max(unweighted_sesmpd),
# row 2
max_phyrich = max(phylo_richness), max_physhan = max(phylo_shannon), max_physimps = max(phylo_simpson), max_wmpd = max(weighted_sesmpd))
## # A tibble: 2 x 9
## fraction max_rich max_shan max_simps max_umpd max_phyrich max_physhan max_physimps max_wmpd
## <fct> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Particle 1394 307. 80.2 1.49 154. 15.8 5.94 0.141
## 2 Free 767 102. 43.4 1.00 108. 8.44 4.05 -0.000646
# Minimum Diversity Values by Fraction
wide_div_meta_df %>%
group_by(fraction) %>%
summarize(min_rich = min(richness), min_shan = min(shannon), min_simps = min(simpson), max_umpd = min(unweighted_sesmpd),
# row 2
max_phyrich = min(phylo_richness), max_physhan = min(phylo_shannon), max_physimps = min(phylo_simpson), max_wmpd = min(weighted_sesmpd))
## # A tibble: 2 x 9
## fraction min_rich min_shan min_simps max_umpd max_phyrich max_physhan max_physimps max_wmpd
## <fct> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Particle 468 42.2 12.6 -1.36 69.5 5.32 2.48 -0.749
## 2 Free 326 32.8 12.2 -0.0927 50.3 3.91 1.59 -0.770
# Locations of labels
maxes <- c(1350, 299, 78, 1.35, 150, 15.2, 5.8, 0.1)
df_temp <- data.frame(hill_labels = site_div_labs, wc_pafl_pvals, fraction, maxes)
fig2 <- long_div_meta_df %>%
dplyr::filter(Diversity %in% c("richness", "shannon", "simpson", "unweighted_sesmpd")) %>%
ggplot(aes(y = div_val, x = fraction, fill = fraction)) +
ylab("Value") +
facet_wrap(hill_labels~., scales = "free", labeller = label_parsed, nrow = 2, ncol = 4) +
geom_point(size = 3, position = position_jitterdodge(), aes(shape = season)) +
geom_boxplot(alpha = 0.5, outlier.shape = NA, color = "black") +
scale_fill_manual(values = fraction_colors) +
scale_shape_manual(values = season_shapes) +
scale_color_manual(values = fraction_colors) +
theme(legend.position = "none", axis.title.x = element_blank()) +
geom_text(data = dplyr::filter(df_temp, hill_labels %in% c("{}^0*italic(D)", "{}^1*italic(D)","{}^2*italic(D)", "italic(Unweighted_MPD)")),
aes(x = fraction, y = maxes, label = wc_pafl_pvals), size = 4.5, hjust = 0.5,color = "grey40")
plot_grid(fig2, season_legend, nrow =2, ncol =1, rel_heights = c(1, 0.08))
Linear Models
Prepare data frames for table S1 and S2
## Prepare the dataframes for Free and Particle data
metadata_pca_PD_free <- dplyr::filter(wide_div_meta_df, fraction == "Free")
metadata_pca_PD_part <- dplyr::filter(wide_div_meta_df, fraction == "Particle")
# List of environmental variables to run linear models on
# BGA_cellspermL and Turb_NTU do not have enough data points for regression
lm_variables <- c("frac_bacprod", "fracprod_per_cell_noinf", "Temp_C", "SpCond_uSpercm", "TDS_mgperL",
"pH", "ORP_mV", "Chl_Lab_ugperL", "Cl_mgperL", "SO4_mgperL", "NO3_mgperL",
"NH3_mgperL", "TKN_mgperL", "SRP_ugperL", "TP_ugperL", "Alk_mgperL", "DO_mgperL",
"total_bac_abund","attached_bac", "dnaconcrep1",
"PC1", "PC2",
"richness", "shannon", "simpson", "phylo_richness", "phylo_shannon", "phylo_simpson",
"unweighted_sesmpd", "weighted_sesmpd")
########################################################
############## Particle: Community-wide
lm_summary_particle_comm <- metadata_pca_PD_part %>%
dplyr::select(one_of(lm_variables), -fracprod_per_cell_noinf) %>%
gather(key = independent, value = measurement, -frac_bacprod) %>%
mutate(measurement = as.numeric(measurement)) %>%
group_by(independent) %>%
do(glance(lm(frac_bacprod ~ measurement, data = .))) %>%
mutate(fraction = "Particle", dependent = "Community Production") %>%
dplyr::select(fraction, dependent, independent, logLik, AIC, adj.r.squared, p.value) %>%
ungroup() %>%
mutate(FDR.p = p.adjust(p.value, method = "fdr"))
############## Particle: Per-capita
lm_summary_particle_percap <- metadata_pca_PD_part %>%
dplyr::select(one_of(lm_variables), -frac_bacprod) %>%
filter(!is.na(fracprod_per_cell_noinf)) %>% # Remove samples that are not NA
gather(key = independent, value = measurement, -fracprod_per_cell_noinf) %>%
mutate(measurement = as.numeric(measurement)) %>%
group_by(independent) %>%
do(glance(lm(log10(fracprod_per_cell_noinf) ~ measurement, data = .))) %>%
mutate(fraction = "Particle", dependent = "Per-Capita Production") %>%
dplyr::select(fraction, dependent, independent, logLik, AIC, adj.r.squared, p.value) %>%
ungroup() %>%
mutate(FDR.p = p.adjust(p.value, method = "fdr"))
########################################################
############## Free: Community-wide
lm_summary_free_comm <- metadata_pca_PD_free %>%
dplyr::select(one_of(lm_variables), -fracprod_per_cell_noinf) %>%
gather(key = independent, value = measurement, -frac_bacprod) %>%
mutate(measurement = as.numeric(measurement)) %>%
group_by(independent) %>%
do(glance(lm(frac_bacprod ~ measurement, data = .))) %>%
mutate(fraction = "Free", dependent = "Community Production") %>%
dplyr::select(fraction, dependent, independent, logLik, AIC, adj.r.squared, p.value) %>%
ungroup() %>%
mutate(FDR.p = p.adjust(p.value, method = "fdr"))
############## Free: Per-capita
lm_summary_free_percap <- metadata_pca_PD_free %>%
dplyr::select(one_of(lm_variables), -frac_bacprod) %>%
filter(!is.na(fracprod_per_cell_noinf)) %>% # Remove samples that are not NA
gather(key = independent, value = measurement, -fracprod_per_cell_noinf) %>%
mutate(measurement = as.numeric(measurement)) %>%
group_by(independent) %>%
do(glance(lm(log10(fracprod_per_cell_noinf) ~ measurement, data = .))) %>%
mutate(fraction = "Free", dependent = "Per-Capita Production") %>%
dplyr::select(fraction, dependent, independent, logLik, AIC, adj.r.squared, p.value) %>%
ungroup() %>%
mutate(FDR.p = p.adjust(p.value, method = "fdr"))
## Filtered PCA Linear Model Results
# Put all of the PCA and environmental linear model results together into one dataframe
all_pca_div_environ_lm_results <-
bind_rows(lm_summary_particle_comm, lm_summary_particle_percap,
lm_summary_free_comm, lm_summary_free_percap) %>%
dplyr::rename(independent_var = independent,
dependent_var = dependent)
## Combine the results with the diversity linear models
individual_lm_results <- all_pca_div_environ_lm_results %>%
mutate(AIC = round(AIC, digits = 2),
adj.r.squared = round(adj.r.squared, digits = 2),
p.value = round(p.value, digits = 4),
FDR.p = round(FDR.p, digits = 4)) %>%
rename(FDR.p.value = FDR.p)
# Free-Living samples only
div_measures_free <- comb_div_df %>%
dplyr::select(fraction, Observed, Diversity, frac_bacprod, fracprod_per_cell_noinf) %>%
filter(fraction == "Free") %>%
dplyr::select(-fraction)
# Particle-associated samples only
div_measures_part <- comb_div_df %>%
dplyr::select(fraction, Observed, Diversity, frac_bacprod, fracprod_per_cell_noinf) %>%
filter(fraction == "Particle") %>%
dplyr::select(-fraction)
## All of the samples!
div_measures_all <- comb_div_df %>%
dplyr::select(Observed, Diversity, frac_bacprod, fracprod_per_cell_noinf)
########################################################
############## Particle: Community-wide
lm_divs_particle_comm <- div_measures_part %>%
dplyr::select(-fracprod_per_cell_noinf) %>%
group_by(Diversity) %>%
do(glance(lm(frac_bacprod ~ Observed, data = .))) %>%
mutate(fraction = "Particle", dependent = "Community Production") %>%
dplyr::select(fraction, dependent, Diversity, logLik, AIC, adj.r.squared, p.value) %>%
ungroup() %>%
mutate(FDR.p = p.adjust(p.value, method = "fdr"))
############## Particle: Community-wide
lm_divs_particle_percap <- div_measures_part %>%
dplyr::select(-frac_bacprod) %>%
group_by(Diversity) %>%
do(glance(lm(log10(fracprod_per_cell_noinf) ~ Observed, data = .))) %>%
mutate(fraction = "Particle", dependent = "Per-Capita Production") %>%
dplyr::select(fraction, dependent, Diversity, logLik, AIC, adj.r.squared, p.value) %>%
ungroup() %>%
mutate(FDR.p = p.adjust(p.value, method = "fdr"))
########################################################
############## Free: Community-wide
lm_divs_free_comm <- div_measures_free %>%
dplyr::select(-fracprod_per_cell_noinf) %>%
group_by(Diversity) %>%
do(glance(lm(frac_bacprod ~ Observed, data = .))) %>%
mutate(fraction = "Free", dependent = "Community Production") %>%
dplyr::select(fraction, dependent, Diversity, logLik, AIC, adj.r.squared, p.value) %>%
ungroup() %>%
mutate(FDR.p = p.adjust(p.value, method = "fdr"))
############## Free: Community-wide
lm_divs_free_percap <- div_measures_free %>%
dplyr::select(-frac_bacprod) %>%
group_by(Diversity) %>%
do(glance(lm(log10(fracprod_per_cell_noinf) ~ Observed, data = .))) %>%
mutate(fraction = "Free", dependent = "Per-Capita Production") %>%
dplyr::select(fraction, dependent, Diversity, logLik, AIC, adj.r.squared, p.value) %>%
ungroup() %>%
mutate(FDR.p = p.adjust(p.value, method = "fdr"))
# Community-Wide Production
table_comm_wide <- bind_rows(lm_divs_particle_comm, lm_divs_free_comm) %>%
dplyr::select(-dependent) %>%
mutate(AIC = round(AIC, digits = 1),
adj.r.squared = round(adj.r.squared, digits = 3),
p.value = round(p.value, digits = 3),
FDR.p = round(FDR.p, digits = 3))
datatable(table_comm_wide,
caption = "Ordinary least squares regression statistics for community-wide heterotrophic production",
options = list(pageLength = 12), rownames = FALSE)
# Log10 Per-capita Production
table_percap <- bind_rows(lm_divs_particle_percap, lm_divs_free_percap) %>%
dplyr::select(-dependent) %>%
mutate(AIC = round(AIC, digits = 1),
adj.r.squared = round(adj.r.squared, digits = 3),
p.value = round(p.value, digits = 3),
FDR.p = round(FDR.p, digits = 3))
datatable(table_percap,
caption = "Ordinary least squares regression statistics for log10(per-capita production)",
options = list(pageLength = 12), rownames = FALSE)
# Here I will calculate the lms so later we can simply use these objects for plotting.
######################################################### COMMUNITY WIDE PRODUCTION VS DIVERSITY
######################################################### COMMUNITY WIDE PRODUCTION VS DIVERSITY
# linear model for community production vs species richness
lm_prod_vs_rich_PA <- lm(frac_bacprod ~ richness, data = filter(wide_div_meta_df, fraction == "Particle"))
lm_prod_vs_rich_FL <- lm(frac_bacprod ~ richness, data = filter(wide_div_meta_df, fraction == "Free"))
# linear model for community production vs exp(H')
lm_prod_vs_shannon_PA <- lm(frac_bacprod ~ shannon, data = filter(wide_div_meta_df, fraction == "Particle"))
lm_prod_vs_shannon_FL <- lm(frac_bacprod ~ shannon, data = filter(wide_div_meta_df, fraction == "Free"))
# linear model for community production vs inverse simpson
lm_prod_vs_invsimps_PA <- lm(frac_bacprod ~ simpson, data = filter(wide_div_meta_df, fraction == "Particle"))
lm_prod_vs_invsimps_FL <- lm(frac_bacprod ~ simpson, data = filter(wide_div_meta_df, fraction == "Free"))
######################## PHYLOGENETIC HILL DIVERSITY METRICS ################################
# 0D: linear model for community production vs phylogenetic richness
lm_prod_vs_phylorich_PA <- lm(frac_bacprod ~ phylo_richness, data = filter(wide_div_meta_df, fraction == "Particle"))
lm_prod_vs_phylorich_FL <- lm(frac_bacprod ~ phylo_richness, data = filter(wide_div_meta_df, fraction == "Free"))
# 1D: linear model for community production vs phylogenetic exp(H')
lm_prod_vs_phyloshan_PA <- lm(frac_bacprod ~ phylo_shannon, data = filter(wide_div_meta_df, fraction == "Particle"))
lm_prod_vs_phyloshan_FL <- lm(frac_bacprod ~ phylo_shannon, data = filter(wide_div_meta_df, fraction == "Free"))
# 2D: linear model for community production vs phylogenetic inverse simpson
lm_prod_vs_phylosimps_PA <- lm(frac_bacprod ~ phylo_simpson, data = filter(wide_div_meta_df, fraction == "Particle"))
lm_prod_vs_phylosimps_FL <- lm(frac_bacprod ~ phylo_simpson, data = filter(wide_div_meta_df, fraction == "Free"))
######################################################### PER CAPITA PRODUCTION VS DIVERSITY
######################################################### PER CAPITA PRODUCTION VS DIVERSITY
################
# linear model for community production vs species richness
lm_percell_prod_vs_rich_PA <- lm(log10(fracprod_per_cell_noinf) ~ richness,
data = filter(wide_div_meta_df, fraction == "Particle"));
lm_percell_prod_vs_rich_FL <- lm(log10(fracprod_per_cell_noinf) ~ richness,
data = filter(wide_div_meta_df, fraction == "Free"));
# linear model for community production vs exp(H')
lm_percell_prod_vs_shannon_PA <- lm(log10(fracprod_per_cell_noinf) ~ shannon,
data = filter(wide_div_meta_df, fraction == "Particle"))
lm_percell_prod_vs_shannon_FL <- lm(log10(fracprod_per_cell_noinf) ~ shannon,
data = filter(wide_div_meta_df, fraction == "Free"))
# linear model for community production vs inverse simpson
lm_percell_prod_vs_invsimps_PA <- lm(log10(fracprod_per_cell_noinf) ~ simpson,
data = filter(wide_div_meta_df, fraction == "Particle"))
lm_percell_prod_vs_invsimps_FL <- lm(log10(fracprod_per_cell_noinf) ~ simpson,
data = filter(wide_div_meta_df, fraction == "Free"))
######################## PHYLOGENETIC HILL DIVERSITY METRICS ################################
# linear model for community production vs phylogenetic richness
lm_percell_prod_vs_phylorich_PA <- lm(log10(fracprod_per_cell_noinf) ~ phylo_richness,
data = filter(wide_div_meta_df, fraction == "Particle"));
lm_percell_prod_vs_phylorich_FL <- lm(log10(fracprod_per_cell_noinf) ~ phylo_richness,
data = filter(wide_div_meta_df, fraction == "Free"));
# linear model for community production vs phylogenetic exp(H')
lm_percell_prod_vs_phyloshan_PA <- lm(log10(fracprod_per_cell_noinf) ~ phylo_shannon,
data = filter(wide_div_meta_df, fraction == "Particle"))
lm_percell_prod_vs_phyloshan_FL <- lm(log10(fracprod_per_cell_noinf) ~ phylo_shannon,
data = filter(wide_div_meta_df, fraction == "Free"))
# linear model for community production vs inverse simpson
lm_percell_prod_vs_phylosimps_PA <- lm(log10(fracprod_per_cell_noinf) ~ phylo_simpson,
data = filter(wide_div_meta_df, fraction == "Particle"))
lm_percell_prod_vs_phylosimps_FL <- lm(log10(fracprod_per_cell_noinf) ~ phylo_simpson,
data = filter(wide_div_meta_df, fraction == "Free"))
Figure 3
Prepare Figure 3
# FDR Correction
fig3_pvals <- c(summary(lm_prod_vs_rich_PA)$coefficients[,4][2], summary(lm_percell_prod_vs_rich_PA)$coefficients[,4][2],
summary(lm_prod_vs_invsimps_PA)$coefficients[,4][2],summary(lm_percell_prod_vs_invsimps_PA)$coefficients[,4][2]);
fig3_adjusted_pvals <- p.adjust(fig3_pvals, method = "fdr")
######################################################### OBSERVED RICHNESS #########################################################
rich_fraction_wilcox <- wilcox.test(richness ~ fraction, data = wide_div_meta_df)
rich_fraction_wilcox
##
## Wilcoxon rank sum test
##
## data: richness by fraction
## W = 125, p-value = 0.001433
## alternative hypothesis: true location shift is not equal to 0
wide_div_meta_df %>%
group_by(fraction) %>%
summarize(mean(richness), sd(richness), min(richness), max(richness))
## # A tibble: 2 x 5
## fraction `mean(richness)` `sd(richness)` `min(richness)` `max(richness)`
## <fct> <dbl> <dbl> <int> <int>
## 1 Particle 799. 257. 468 1394
## 2 Free 524. 138. 326 767
# Make a data frame to draw significance line in boxplot (visually calculated)
rich_nums <- data.frame(a = c(1.15,1.15,1.85,1.85), b = c(1350, 1400, 1400, 1350)) # WholePart vs WholeFree
rich_distribution_plot <-
wide_div_meta_df %>%
ggplot(aes(y = richness, x = fraction)) +
scale_fill_manual(values = fraction_colors) +
geom_jitter(size = 3, aes(fill = fraction, shape = season), width = 0.2) +
geom_boxplot(alpha = 0.5, outlier.shape = NA, aes(fill = fraction)) +
scale_y_continuous(limits = c(150,1500), breaks = seq(from = 0, to =1500, by = 300)) +
labs(y = expression({}^0*italic(D)),
x = "Fraction") +
scale_shape_manual(values = season_shapes) +
geom_path(data = rich_nums, aes(x = a, y = b), linetype = 1, color = "#424645") +
annotate("text", x=1.5, y=1500, size = 8, color = "#424645", label= paste("***"), angle = 90) +
annotate("text", x=1.5, y=1150, size = 4, color = "#424645",
label= paste("p =", round(rich_fraction_wilcox$p.value, digits = 3))) +
theme(legend.position = "none",# axis.title.y = element_blank(),
axis.title.x = element_blank(), axis.text.x = element_blank()) +
coord_flip()
################ Richness vs Community-wide (Per-Liter) Production
## 1. Extract the R2 and p-value from the linear model:
lm_lab_perliter_rich_PA <- paste("atop(R^2 ==", round(summary(lm_prod_vs_rich_PA)$adj.r.squared, digits = 2), ",",
"p ==", round(unname(fig3_adjusted_pvals[1]), digits = 3), ")")
## 2. Plot Richness vs Community-wide (Per-Liter) Production
prod_vs_rich_plot <-
wide_div_meta_df %>%
# Fetch the standard error from diversity measure to plot error bars
left_join(filter(div_iNEXT, Diversity == "Species richness"), by = "norep_filter_name") %>%
rename(se_iNEXT ="s.e.") %>%
ggplot(aes(x=richness, y=frac_bacprod)) +
geom_errorbarh(aes(xmin = richness - se_iNEXT, xmax = richness + se_iNEXT,
color = fraction), alpha = 0.7) + # X-axis errorbars
geom_errorbar(aes(ymin = frac_bacprod - SD_frac_bacprod, ymax = frac_bacprod + SD_frac_bacprod,
color = fraction)) + # Y-axis errorbars
geom_point(size = 3.5, color = "black", aes(fill = fraction, shape = season)) +
scale_color_manual(values = fraction_colors) +
scale_fill_manual(values = fraction_colors) +
scale_shape_manual(values = season_shapes) +
labs(x = expression({}^0*italic(D)),
y = expression(atop("Community Production", "(μgC/L/day)"))) +
geom_smooth(data=filter(wide_div_meta_df, fraction == "Particle"),
method='lm', color = "#FF6600", fill = "#FF6600") +
scale_x_continuous(limits = c(150,1500), breaks = seq(from = 0, to =1500, by = 300)) +
scale_y_continuous(limits = c(-5, 75), breaks = seq(0, 75, by = 15)) +
# Add the R2 and p-value to the plot
annotate("text", x=1200, y=50, label=lm_lab_perliter_rich_PA, parse = TRUE, color = "#FF6600", size = 4) +
theme(legend.position = "none") # axis.title.x = element_blank(), axis.text.x = element_blank()
# Summary stats
summary(lm(frac_bacprod ~ richness, data = filter(wide_div_meta_df, fraction == "Particle")))
##
## Call:
## lm(formula = frac_bacprod ~ richness, data = filter(wide_div_meta_df,
## fraction == "Particle"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.5089 -1.9461 -0.8788 4.1222 7.7585
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -10.288864 5.169998 -1.990 0.07461 .
## richness 0.025351 0.006185 4.099 0.00215 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.281 on 10 degrees of freedom
## Multiple R-squared: 0.6268, Adjusted R-squared: 0.5895
## F-statistic: 16.8 on 1 and 10 DF, p-value: 0.00215
# WITHOUT THE TWO HIGH POINTS
summary(lm(frac_bacprod ~ richness, data = filter(wide_div_meta_df, fraction == "Particle" & richness < 1000)))
##
## Call:
## lm(formula = frac_bacprod ~ richness, data = filter(wide_div_meta_df,
## fraction == "Particle" & richness < 1000))
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.152 -3.181 -1.600 3.647 8.554
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.404104 8.621046 0.279 0.787
## richness 0.006798 0.012051 0.564 0.588
##
## Residual standard error: 4.844 on 8 degrees of freedom
## Multiple R-squared: 0.03826, Adjusted R-squared: -0.08196
## F-statistic: 0.3182 on 1 and 8 DF, p-value: 0.5881
################ Richness vs Per Capita (Per-Cell) Production
## 1. Extract the R2 and p-value from the linear model:
lm_lab_percell_rich_PA <- paste("atop(R^2 ==", round(summary(lm_percell_prod_vs_rich_PA)$adj.r.squared, digits = 2), ",",
"p ==", round(unname(fig3_adjusted_pvals[2]), digits = 3), ")")
## 2. Plot Richness vs Per Capita (Per-Cell) Production
percell_prod_vs_rich_plot <-
wide_div_meta_df %>%
# Fetch the standard error from diversity measure to plot error bars
left_join(filter(div_iNEXT, Diversity == "Species richness"), by = "norep_filter_name") %>%
rename(se_iNEXT ="s.e.") %>%
ggplot(aes(x=richness, y=log10(fracprod_per_cell_noinf))) +
geom_errorbarh(aes(xmin = richness - se_iNEXT, xmax = richness + se_iNEXT,
color = fraction), alpha = 0.7) + # X-axis errorbars
geom_point(size = 3.5, color = "black", aes(fill = fraction, shape = season)) +
scale_color_manual(values = fraction_colors) +
scale_fill_manual(values = fraction_colors) +
scale_shape_manual(values = season_shapes) +
labs(x = expression({}^0*italic(D)),
y = expression(atop(log[10]*"(Per-Capita Production)", "(μgC/cell/day)"))) +
geom_smooth(data=filter(wide_div_meta_df, fraction == "Particle"),
method='lm', color = "#FF6600", fill = "#FF6600") +
scale_x_continuous(limits = c(150,1500), breaks = seq(from = 0, to =1500, by = 300)) +
scale_y_continuous(limits = c(-8.5,-5), breaks = seq(from = -8, to =-5, by = 1)) +
#scale_y_continuous(limits = c(-8e-7,5e-6), breaks = seq(from = 0, to = 6e-7, by = 3e-7)) +
# Add the R2 and p-value to the plot
annotate("text", x=1200, y=-7.5, label=lm_lab_percell_rich_PA, parse = TRUE, color = "#FF6600", size = 4) +
theme(legend.title = element_blank(), legend.position ="bottom",
legend.text = element_text(size = 14))
# Summary stats
summary(lm(log10(fracprod_per_cell_noinf) ~ richness, data = filter(wide_div_meta_df, fraction == "Particle")))
##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ richness, data = filter(wide_div_meta_df,
## fraction == "Particle"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.69547 -0.18868 0.00469 0.25891 0.43373
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.9719413 0.3519522 -22.651 3.02e-09 ***
## richness 0.0015438 0.0004157 3.714 0.00481 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3526 on 9 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.6052, Adjusted R-squared: 0.5613
## F-statistic: 13.79 on 1 and 9 DF, p-value: 0.004814
# WITHOUT THE TWO HIGH POINTS
summary(lm(log10(fracprod_per_cell_noinf) ~ richness, data = filter(wide_div_meta_df, fraction == "Particle" & richness < 1000)))
##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ richness, data = filter(wide_div_meta_df,
## fraction == "Particle" & richness < 1000))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.32289 -0.25690 -0.00076 0.25072 0.37097
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -6.911e+00 5.157e-01 -13.401 3.02e-06 ***
## richness -2.318e-05 7.197e-04 -0.032 0.975
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2893 on 7 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.0001482, Adjusted R-squared: -0.1427
## F-statistic: 0.001037 on 1 and 7 DF, p-value: 0.9752
######################################################### 2D = SIMPSON DIVERSITY (INVERSE SIMPSON)
######################################################### 2D = SIMPSON DIVERSITY (INVERSE SIMPSON)
invsimps_fraction_wilcox <- wilcox.test(simpson ~ fraction, data = wide_div_meta_df)
invsimps_fraction_wilcox
##
## Wilcoxon rank sum test
##
## data: simpson by fraction
## W = 80, p-value = 0.6707
## alternative hypothesis: true location shift is not equal to 0
wide_div_meta_df %>%
group_by(fraction) %>%
summarize(mean(simpson), sd(simpson), min(simpson), max(simpson))
## # A tibble: 2 x 5
## fraction `mean(simpson)` `sd(simpson)` `min(simpson)` `max(simpson)`
## <fct> <dbl> <dbl> <dbl> <dbl>
## 1 Particle 35.7 24.2 12.6 80.2
## 2 Free 24.2 8.22 12.2 43.4
# Make a data frame to draw significance line in boxplot (visually calculated)
invsimps_nums <- data.frame(a = c(1.15,1.15,1.85,1.85), b = c(83,85,85,83)) # WholePart vs WholeFree
invsimps_distribution_plot <-
wide_div_meta_df %>%
ggplot(aes(y = simpson, x = fraction)) +
scale_fill_manual(values = fraction_colors) +
scale_shape_manual(values = season_shapes) +
geom_jitter(size = 3, aes(fill = fraction, shape = season), width = 0.2) +
geom_boxplot(alpha = 0.5, outlier.shape = NA, aes(fill = fraction)) +
scale_y_continuous(limits = c(0,85), breaks = seq(from = 0, to = 85, by = 20)) +
labs(y = expression({}^2*italic(D)),
x = "Fraction") +
geom_path(data = invsimps_nums, aes(x = a, y = b), linetype = 1, color = "#424645") +
annotate("text", x=1.5, y=80, size = 4, color = "#424645", label= "NS") +
theme(legend.position = "none", #axis.title.y = element_blank(),
axis.title.x = element_blank(), axis.text.x = element_blank()) +
coord_flip()
################ Inverse Simpson vs Community-wide (Per-Liter) Production
## 1. Extract the R2 and p-value from the linear model:
lm_lab_perliter_invsimps_PA <- paste("atop(R^2 ==", round(summary(lm_prod_vs_invsimps_PA)$adj.r.squared, digits = 2), ",",
"p ==", round(unname(fig3_adjusted_pvals[3]), digits = 3), ")")
## 2. Plot Inverse Simpson vs Community-wide (Per-Liter) Production
prod_vs_invsimps_plot <-
wide_div_meta_df %>%
# Fetch the standard error from diversity measure to plot error bars
left_join(filter(div_iNEXT, Diversity == "Simpson diversity"), by = "norep_filter_name") %>%
rename(se_iNEXT ="s.e.") %>%
ggplot(aes(x=simpson, y=frac_bacprod)) +
geom_errorbarh(aes(xmin = simpson - se_iNEXT, xmax = simpson + se_iNEXT,
color = fraction), alpha = 0.7) + # X-axis errorbars
geom_errorbar(aes(ymin = frac_bacprod - SD_frac_bacprod, ymax = frac_bacprod + SD_frac_bacprod, color = fraction)) + # Y-axis errorbars
geom_point(size = 3.5, color = "black", aes(fill = fraction, shape = season)) +
scale_color_manual(values = fraction_colors) +
scale_fill_manual(values = fraction_colors) +
scale_shape_manual(values = season_shapes) +
labs(x = expression({}^2*italic(D)),
y = expression(atop("Community Production", "(μgC/L/day)"))) +
geom_smooth(data=filter(wide_div_meta_df, fraction == "Particle"), method='lm', color = "#FF6600", fill = "#FF6600") +
scale_x_continuous(limits = c(0,85), breaks = seq(from = 0, to = 85, by = 20)) +
scale_y_continuous(limits = c(-5, 75), breaks = seq(0, 75, by = 15)) +
# Add the R2 and p-value to the plot
annotate("text", x=70, y=45, label=lm_lab_perliter_invsimps_PA, parse = TRUE, color = "#FF6600", size = 4) +
theme(legend.position = "none") #axis.title.x = element_blank(), axis.text.x = element_blank()
# Summary stats
summary(lm(frac_bacprod ~ simpson, data = filter(wide_div_meta_df, fraction == "Particle")))
##
## Call:
## lm(formula = frac_bacprod ~ simpson, data = filter(wide_div_meta_df,
## fraction == "Particle"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.5237 -2.1046 -0.1732 0.8787 7.7518
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.39792 2.41547 -0.165 0.872433
## simpson 0.28978 0.05672 5.109 0.000458 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.55 on 10 degrees of freedom
## Multiple R-squared: 0.723, Adjusted R-squared: 0.6953
## F-statistic: 26.1 on 1 and 10 DF, p-value: 0.000458
# WITHOUT THE TWO HIGH POINTS
summary(lm(frac_bacprod ~ simpson, data = filter(wide_div_meta_df, fraction == "Particle" & simpson < 70)))
##
## Call:
## lm(formula = frac_bacprod ~ simpson, data = filter(wide_div_meta_df,
## fraction == "Particle" & simpson < 70))
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.3147 -1.6362 -0.3721 1.4970 6.5303
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.64917 2.47998 0.665 0.5248
## simpson 0.20339 0.08038 2.530 0.0352 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.681 on 8 degrees of freedom
## Multiple R-squared: 0.4445, Adjusted R-squared: 0.3751
## F-statistic: 6.402 on 1 and 8 DF, p-value: 0.03524
################ Inverse Simpson vs Per Capitra (Per-Cell) Production
## 1. Extract the R2 and p-value from the linear model:
lm_lab_percell_invsimps_PA <- paste("atop(R^2 ==", round(summary(lm_percell_prod_vs_invsimps_PA)$adj.r.squared, digits = 2), ",",
"p ==", round(unname(fig3_adjusted_pvals[4]), digits = 3), ")")
## 2. Plot Inverse Simpson vs Per Capitra (Per-Cell) Production
percell_prod_vs_invsimps_plot <-
wide_div_meta_df %>%
# Fetch the standard error from diversity measure to plot error bars
left_join(filter(div_iNEXT, Diversity == "Simpson diversity"), by = "norep_filter_name") %>%
rename(se_iNEXT ="s.e.") %>%
ggplot(aes(x=simpson, y=log10(fracprod_per_cell_noinf))) +
geom_errorbarh(aes(xmin = simpson - se_iNEXT, xmax = simpson + se_iNEXT,
color = fraction), alpha = 0.7) + # X-axis errorbars
geom_point(size = 3.5, color = "black", aes(shape = season, fill = fraction)) +
scale_color_manual(values = fraction_colors, guide = TRUE) +
scale_fill_manual(values = fraction_colors, guide = FALSE) +
scale_shape_manual(values = season_shapes) +
labs(x = expression({}^2*italic(D)),
y = expression(atop(log[10]*"(Per-Capita Production)", "(μgC/cell/day)"))) +
geom_smooth(data=filter(wide_div_meta_df, fraction == "Particle"), method='lm', color = "#FF6600", fill = "#FF6600") +
scale_x_continuous(limits = c(0,85), breaks = seq(from = 0, to = 85, by = 20)) +
scale_y_continuous(limits = c(-8.5,-5), breaks = seq(from = -8, to =-5, by = 1)) +
# Add the R2 and p-value to the plot
annotate("text", x=70, y=-7.5, label=lm_lab_percell_invsimps_PA, parse = TRUE, color = "#FF6600", size = 4) +
guides(color = guide_legend(override.aes = list(shape = 15))) +
theme(legend.title = element_blank(), legend.position ="bottom",
legend.text = element_text(size = 14))
# Summary stats
summary(lm(log10(fracprod_per_cell_noinf) ~ simpson, data = filter(wide_div_meta_df, fraction == "Particle")))
##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ simpson, data = filter(wide_div_meta_df,
## fraction == "Particle"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.28621 -0.18272 -0.11287 0.07464 0.56371
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.357307 0.158221 -46.500 4.92e-12 ***
## simpson 0.017852 0.003694 4.833 0.00093 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2959 on 9 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.7219, Adjusted R-squared: 0.691
## F-statistic: 23.36 on 1 and 9 DF, p-value: 0.00093
# WITHOUT THE TWO HIGH POINTS
summary(lm(log10(fracprod_per_cell_noinf) ~ simpson, data = filter(wide_div_meta_df, fraction == "Particle" & simpson < 70)))
##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ simpson, data = filter(wide_div_meta_df,
## fraction == "Particle" & simpson < 70))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.23956 -0.19011 -0.03463 0.07002 0.49075
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.172281 0.164525 -43.59 8.73e-10 ***
## simpson 0.009474 0.005539 1.71 0.131
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2429 on 7 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.2947, Adjusted R-squared: 0.194
## F-statistic: 2.925 on 1 and 7 DF, p-value: 0.1309
Plot figure 3
# All plots together
fig3_top <- plot_grid(prod_vs_rich_plot + theme(plot.margin = unit(c(0.2,0.2,0.2,0.8), "cm")), # t, r, b, l
prod_vs_invsimps_plot,
percell_prod_vs_rich_plot + theme(legend.position = "none"),
percell_prod_vs_invsimps_plot + theme(legend.position = "none"),
labels = c("A", "B", "C", "D"), ncol = 2, nrow = 2,
rel_heights = c(1, 1), rel_widths = c(1,1),
align = "hv")
# add legend
plot_grid(fig3_top, season_legend,
ncol = 1, nrow = 2,
rel_heights = c(1, 0.05))
Graphical Abstract
wide_div_meta_df %>%
# Fetch the standard error from diversity measure to plot error bars
left_join(filter(div_iNEXT, Diversity == "Simpson diversity"), by = "norep_filter_name") %>%
rename(se_iNEXT ="s.e.") %>%
ggplot(aes(x=simpson, y=frac_bacprod)) +
geom_errorbarh(aes(xmin = simpson - se_iNEXT, xmax = simpson + se_iNEXT,
color = fraction), alpha = 0.7) + # X-axis errorbars
geom_errorbar(aes(ymin = frac_bacprod - SD_frac_bacprod, ymax = frac_bacprod + SD_frac_bacprod, color = fraction)) + # Y-axis errorbars
geom_point(size = 3.5, shape = 22, color = "black", aes(fill = fraction, shape = season)) +
scale_color_manual(values = fraction_colors) +
scale_fill_manual(values = fraction_colors) +
labs(x = expression(textstyle("Inverse Simpson or")~{}^2*italic(D)),
y = "Community Production \n (μgC/L/day)") +
geom_smooth(data=filter(wide_div_meta_df, fraction == "Particle"), method='lm', color = "#FF6600", fill = "#FF6600") +
scale_x_continuous(limits = c(0,85), breaks = seq(from = 0, to = 85, by = 20)) +
scale_y_continuous(limits = c(-5, 75), breaks = seq(0, 75, by = 15)) +
# Add the R2 and p-value to the plot
annotate("text", x=70, y=45, label=lm_lab_perliter_invsimps_PA, parse = TRUE, color = "#FF6600", size = 4) +
theme(legend.position = c(0.68, 0.90), legend.title = element_blank()) #axis.title.x = element_blank(), axis.text.x = element_blank()
Figure 4
Phylodiv vs ses.MPD
# Is there a relationship between phylogenetic richness (0D) and the unweighted mean pairwise distance?
# 1. Since it's a general correlation - the linear model will be for
lm_unweightMPD_phylorich <- lm(unweighted_sesmpd ~ phylo_richness, data = wide_div_meta_df)
## 2. Extract the R2 and p-value from the linear model:
lm_lab_unweightMPD_phylorich <- paste("atop(R^2 ==", round(summary(lm_unweightMPD_phylorich)$adj.r.squared, digits = 2), ",",
"p ==", round(unname(summary(lm_unweightMPD_phylorich)$coefficients[,4][2]), digits = 3), ")")
## 3. Plot
phy_MPD_p0 <- wide_div_meta_df %>%
ggplot(aes(x = phylo_richness, y = unweighted_sesmpd, fill = fraction)) +
geom_point(aes(shape = season), size = 3) +
scale_fill_manual(values = fraction_colors) +
scale_shape_manual(values = season_shapes) +
labs(x = expression({}^0*italic(PD)),
y = "Unweighted MPD") +
geom_smooth(method='lm', color = "black", fill = "grey") +
# Add the R2 and p-value to the plot
annotate("text", x=140, y=1, label=lm_lab_unweightMPD_phylorich, parse = TRUE, color = "black", size = 4) +
theme(legend.position = "none")
plot_grid(phy_MPD_p0, season_2row_legend, rel_heights = c(1, 0.1),
nrow = 2, ncol = 1)
Lasso Regressions
Prepare data for lasso
# The lasso regression needs datasets that are wide formatted
lasso_cols_remove <- c("project", "year", "Date", "limnion", "dnaconcrep1", "perc_attached_bacprod",
"SD_perc_attached_bacprod", "SE_total_bac_abund", "SE_perc_attached_abund", "SE_attached_bac",
"tot_bacprod", "SD_tot_bacprod","SD_frac_bacprod", "BGA_cellspermL", "Turb_NTU",
"lakesite", "season", "station")
### PARTICLE DATA = 12 samples
# Note: Even though in the inclusion of MOTEJ515 doesn't have the bacterial counts,
# the results from this lasso regression does not change.
lasso_data_df_particle <- wide_div_meta_df %>%
filter(fraction == "Particle" ) %>%
dplyr::select(-c(fraction, lasso_cols_remove))
lasso_data_df_particle_noprod <- lasso_data_df_particle %>%
dplyr::select(-c(fracprod_per_cell, fracprod_per_cell_noinf, norep_filter_name, norep_water_name))
percell_lasso_data_df_particle_noprod <- lasso_data_df_particle %>%
dplyr::select(-c(fracprod_per_cell, frac_bacprod)) %>%
dplyr::filter(norep_filter_name != "MOTEJ515") %>% # Remove the missing row of data :( )
dplyr::select(-c(norep_filter_name, norep_water_name))
### FREE DATA
# Note: Even though in the inclusion of MOTEJ515 doesn't have the bacterial counts,
# the results from this lasso regression does not change.
lasso_data_df_free <- wide_div_meta_df %>%
filter(fraction == "Free" ) %>%
dplyr::select(-c(fraction, lasso_cols_remove))
lasso_data_df_free_noprod <- lasso_data_df_free %>%
dplyr::select(-c(fracprod_per_cell, fracprod_per_cell_noinf, norep_filter_name, norep_water_name))
percell_lasso_data_df_free_noprod <- lasso_data_df_free %>%
dplyr::select(-c(fracprod_per_cell, frac_bacprod)) %>%
dplyr::filter(norep_filter_name != "MOTEJ515") %>% # Remove the missing row of data :( )
dplyr::select(-c(norep_filter_name, norep_water_name))
####Run lasso regression
Lasso - Community Production: Particle
# Set the seed for reproducibility of the grid values
set.seed(777)
################ PREPARE DATA ################
# Subset data needed and scale all o
scaled_comm_data <-
lasso_data_df_particle_noprod %>% # Use data only for the particle samples
scale() %>% # Scale all of the variables to have mean =0 and sd = 1
as.data.frame() # Make it a dataframe so that model.matrix function works (does not take a matrix)
# Set model parameters for community level data
## NOTE: there cannot be any data with NA
x = model.matrix(frac_bacprod ~ ., scaled_comm_data)[,-1]
y = scaled_comm_data$frac_bacprod
grid = 10^seq(10,-2,length = 100)
################ PREPARE TRAINING & TESTING DATA FOR CROSS VALIDATION ################
# Pull out test and training sets for cross validation
# We will use half the set to train the model and the 2nd half of the dataset to test the model
train <- sample(1:nrow(x), nrow(x)/2)
test <- -train
y_test <- y[test]
################ LASSO ################
# Run lasso regression with alpha = 1
lasso_divs_train <- glmnet(x[train,], y[train], alpha = 1, lambda = grid, standardize = FALSE)
par(mfrow = c(1,2))
plot(lasso_divs_train)
# Cross validation
cv_lasso_divs <- cv.glmnet(x[train,], y[train], alpha = 1)
plot(cv_lasso_divs)
best_lasso_lambda <- cv_lasso_divs$lambda.min # Minimum lambda
# https://stats.stackexchange.com/questions/138569/why-is-lambda-plus-1-standard-error-a-recommended-value-for-lambda-in-an-elastic
lasso_lambda_1se <- cv_lasso_divs$lambda.1se # simplest model whose accuracy is comparable with the best model
best_lasso_lambda == lasso_lambda_1se
## [1] TRUE
lasso_divs_pred <- predict(lasso_divs_train, s = best_lasso_lambda, newx = x[test,])
mean((lasso_divs_pred - y_test)^2) # Test
## [1] 1.459684
## Run lasso on the entire dataset with the best lambda value
lasso_divs <- glmnet(x, y, alpha = 1, lambda = best_lasso_lambda, standardize = FALSE)
# What are the lasso coefficients? (Anything with a . is not selected by the model)
predict(lasso_divs, type = "coefficients", s = best_lasso_lambda)
## 32 x 1 sparse Matrix of class "dgCMatrix"
## 1
## (Intercept) -5.001679e-17
## richness 2.878328e-02
## shannon .
## simpson 3.049965e-01
## phylo_shannon .
## phylo_simpson .
## phylo_richness .
## unweighted_sesmpd .
## weighted_sesmpd .
## Sample_depth_m .
## Temp_C .
## SpCond_uSpercm .
## TDS_mgperL .
## pH .
## ORP_mV .
## Chl_Lab_ugperL .
## Cl_mgperL .
## SO4_mgperL .
## NO3_mgperL .
## NH3_mgperL .
## TKN_mgperL .
## SRP_ugperL .
## TP_ugperL .
## Alk_mgperL .
## DO_mgperL .
## DO_percent .
## total_bac_abund .
## attached_bac .
## perc_attached_abund .
## fraction_bac_abund .
## PC1 .
## PC2 .
## Now, run the most simple, parsimonious lasso model within 1 standard error
## IMPORTANT NOTE: This is the lasso reported within the manuscript
lasso_divs_1se <- glmnet(x, y, alpha = 1, lambda = lasso_lambda_1se, standardize = FALSE)
# What are the lasso coefficients? (Anything with a . is not selected by the model)
predict(lasso_divs_1se, type = "coefficients", s = lasso_lambda_1se)
## 32 x 1 sparse Matrix of class "dgCMatrix"
## 1
## (Intercept) -5.001679e-17
## richness 2.878328e-02
## shannon .
## simpson 3.049965e-01
## phylo_shannon .
## phylo_simpson .
## phylo_richness .
## unweighted_sesmpd .
## weighted_sesmpd .
## Sample_depth_m .
## Temp_C .
## SpCond_uSpercm .
## TDS_mgperL .
## pH .
## ORP_mV .
## Chl_Lab_ugperL .
## Cl_mgperL .
## SO4_mgperL .
## NO3_mgperL .
## NH3_mgperL .
## TKN_mgperL .
## SRP_ugperL .
## TP_ugperL .
## Alk_mgperL .
## DO_mgperL .
## DO_percent .
## total_bac_abund .
## attached_bac .
## perc_attached_abund .
## fraction_bac_abund .
## PC1 .
## PC2 .
Lasso - Community Production: Free
# Set the seed for reproducibility of the grid values
set.seed(777)
################ PREPARE DATA ################
# Subset data needed and scale all o
scaled_comm_data_free <-
lasso_data_df_free_noprod %>% # Use data only for the free-living samples
scale() %>% # Scale all of the variables to have mean = 0 and sd = 1
as.data.frame() # Make it a dataframe so that model.matrix function works (does not take a matrix)
# Set model parameters for community level data
## NOTE: there cannot be any data with NA
free_x <- model.matrix(frac_bacprod ~ ., scaled_comm_data_free)[,-1]
free_y <- scaled_comm_data_free$frac_bacprod
free_grid <- 10^seq(10,-2,length = 100)
################ PREPARE TRAINING & TESTING DATA FOR CROSS VALIDATION ################
# Pull out test and training sets for cross validation
# We will use half the set to train the model and the 2nd half of the dataset to test the model
free_train <- sample(1:nrow(free_x), nrow(free_x)/2)
free_test <- -free_train
free_y_test <- free_y[free_test]
################ LASSO ################
# Run lasso regression with alpha = 1
lasso_divs_train_free_comm <- glmnet(free_x[free_train,], free_y[free_train], alpha = 1, lambda = free_grid, standardize = FALSE)
par(mfrow = c(1,2))
plot(lasso_divs_train_free_comm)
# Cross validation
free_cv_lasso_divs <- cv.glmnet(free_x[free_train,], free_y[free_train], alpha = 1)
plot(free_cv_lasso_divs)
free_best_lasso_lambda <- free_cv_lasso_divs$lambda.min
# https://stats.stackexchange.com/questions/138569/why-is-lambda-plus-1-standard-error-a-recommended-value-for-lambda-in-an-elastic
free_lasso_lambda_1se <- free_cv_lasso_divs$lambda.1se # simplest model whose accuracy is comparable with the best model
free_best_lasso_lambda == free_lasso_lambda_1se
## [1] FALSE
free_lasso_divs_pred <- predict(lasso_divs_train_free_comm, s = free_best_lasso_lambda, newx = free_x[free_test,])
mean((free_lasso_divs_pred - y_test)^2) # Test
## [1] 1.425752
## Run lasso on the entire dataset with the best lambda value
free_lasso_divs <- glmnet(free_x, free_y, alpha = 1, lambda = free_best_lasso_lambda, standardize = FALSE)
# What are the lasso coefficients? (Anything with a . is not selected by the model)
predict(free_lasso_divs, type = "coefficients", s = free_best_lasso_lambda)
## 32 x 1 sparse Matrix of class "dgCMatrix"
## 1
## (Intercept) -5.076392e-16
## richness .
## shannon .
## simpson .
## phylo_shannon .
## phylo_simpson .
## phylo_richness .
## unweighted_sesmpd .
## weighted_sesmpd .
## Sample_depth_m .
## Temp_C .
## SpCond_uSpercm .
## TDS_mgperL .
## pH -2.606707e-01
## ORP_mV .
## Chl_Lab_ugperL .
## Cl_mgperL .
## SO4_mgperL .
## NO3_mgperL .
## NH3_mgperL .
## TKN_mgperL .
## SRP_ugperL .
## TP_ugperL .
## Alk_mgperL .
## DO_mgperL .
## DO_percent .
## total_bac_abund .
## attached_bac .
## perc_attached_abund .
## fraction_bac_abund .
## PC1 .
## PC2 .
## Now, run the most simple, parsimonious lasso model within 1 standard error
## IMPORTANT NOTE: This is the lasso reported within the manuscript
free_lasso_divs_1se <- glmnet(free_x, free_y, alpha = 1, lambda = free_lasso_lambda_1se, standardize = FALSE)
# What are the lasso coefficients? (Anything with a . is not selected by the model)
predict(free_lasso_divs_1se, type = "coefficients", s = free_lasso_lambda_1se)
## 32 x 1 sparse Matrix of class "dgCMatrix"
## 1
## (Intercept) -4.127014e-16
## richness .
## shannon .
## simpson .
## phylo_shannon .
## phylo_simpson .
## phylo_richness .
## unweighted_sesmpd .
## weighted_sesmpd .
## Sample_depth_m .
## Temp_C .
## SpCond_uSpercm .
## TDS_mgperL .
## pH -2.144478e-01
## ORP_mV .
## Chl_Lab_ugperL .
## Cl_mgperL .
## SO4_mgperL .
## NO3_mgperL .
## NH3_mgperL .
## TKN_mgperL .
## SRP_ugperL .
## TP_ugperL .
## Alk_mgperL .
## DO_mgperL .
## DO_percent .
## total_bac_abund .
## attached_bac .
## perc_attached_abund .
## fraction_bac_abund .
## PC1 .
## PC2 .
Lasso - Per Capita Production: Particle
# Set the seed for reproducibility of the grid values
set.seed(777)
################ PREPARE DATA ################
# Subset data needed and scale all o
scaled_percapita_pa_data <-
percell_lasso_data_df_particle_noprod %>%
dplyr::filter(!is.na(fracprod_per_cell_noinf)) %>%
mutate(log10_percell = log10(fracprod_per_cell_noinf)) %>%
dplyr::select(-c(fracprod_per_cell_noinf)) %>%
scale() %>%
as.data.frame() # Make it a dataframe so that model.matrix function works (does not take a matrix)
# Set model parameters for community level data
## NOTE: there cannot be any data with NA
percap_pa_x = model.matrix(log10_percell ~ ., scaled_percapita_pa_data)[,-1]
percap_pa_y = scaled_percapita_pa_data$log10_percell
percap_pa_grid = 10^seq(10,-2,length = 100)
################ PREPARE TRAINING & TESTING DATA FOR CROSS VALIDATION ################
# Pull out test and training sets for cross validation
# We will use half the set to train the model and the 2nd half of the dataset to test the model
percap_pa_train <- sample(1:nrow(percap_pa_x), nrow(percap_pa_x)/2)
percap_pa_test <- -percap_pa_train
percap_pa_y_test <- percap_pa_y[percap_pa_test]
################ LASSO ################
# Run lasso regression with alpha = 1
percap_pa_lasso_divs_train <- glmnet(percap_pa_x[percap_pa_train,], percap_pa_y[percap_pa_train], alpha = 1, lambda = percap_pa_grid, standardize = FALSE)
par(mfrow = c(1,2))
plot(percap_pa_lasso_divs_train)
# Cross validation
percap_pa_cv_lasso_divs <- cv.glmnet(percap_pa_x[percap_pa_train,], percap_pa_y[percap_pa_train], alpha = 1)
plot(percap_pa_cv_lasso_divs)
percap_pa_best_lasso_lambda <- percap_pa_cv_lasso_divs$lambda.min # Minimum lambda
# https://stats.stackexchange.com/questions/138569/why-is-lambda-plus-1-standard-error-a-recommended-value-for-lambda-in-an-elastic
percap_pa_lasso_lambda_1se <- percap_pa_cv_lasso_divs$lambda.1se # simplest model whose accuracy is comparable with the best model
percap_pa_best_lasso_lambda == percap_pa_lasso_lambda_1se
## [1] FALSE
percap_pa_lasso_divs_pred <- predict(percap_pa_lasso_divs_train, s = percap_pa_best_lasso_lambda, newx = percap_pa_x[percap_pa_test,])
mean((percap_pa_lasso_divs_pred - percap_pa_y_test)^2) # Test
## [1] 1.115762
## Run lasso on the entire dataset with the best lambda value
percap_pa_lasso_divs <- glmnet(percap_pa_x, percap_pa_y, alpha = 1, lambda = percap_pa_best_lasso_lambda, standardize = FALSE)
# What are the lasso coefficients? (Anything with a . is not selected by the model)
predict(percap_pa_lasso_divs, type = "coefficients", s = percap_pa_best_lasso_lambda)
## 32 x 1 sparse Matrix of class "dgCMatrix"
## 1
## (Intercept) -6.639099e-16
## richness 8.155323e-02
## shannon .
## simpson 3.318283e-01
## phylo_shannon .
## phylo_simpson .
## phylo_richness .
## unweighted_sesmpd .
## weighted_sesmpd .
## Sample_depth_m .
## Temp_C -1.365431e-01
## SpCond_uSpercm .
## TDS_mgperL .
## pH .
## ORP_mV .
## Chl_Lab_ugperL .
## Cl_mgperL .
## SO4_mgperL .
## NO3_mgperL .
## NH3_mgperL .
## TKN_mgperL .
## SRP_ugperL .
## TP_ugperL .
## Alk_mgperL .
## DO_mgperL .
## DO_percent .
## total_bac_abund .
## attached_bac .
## perc_attached_abund .
## fraction_bac_abund .
## PC1 .
## PC2 .
## Now, run the most simple, parsimonious lasso model within 1 standard error
## IMPORTANT NOTE: This is the lasso reported within the manuscript
percap_pa_lasso_divs_1se <- glmnet(percap_pa_x, percap_pa_y, alpha = 1, lambda = percap_pa_lasso_lambda_1se, standardize = FALSE)
# What are the lasso coefficients? (Anything with a . is not selected by the model)
predict(percap_pa_lasso_divs_1se, type = "coefficients", s = percap_pa_lasso_lambda_1se)
## 32 x 1 sparse Matrix of class "dgCMatrix"
## 1
## (Intercept) -6.490174e-16
## richness 3.518435e-02
## shannon .
## simpson 3.244235e-01
## phylo_shannon .
## phylo_simpson .
## phylo_richness .
## unweighted_sesmpd .
## weighted_sesmpd .
## Sample_depth_m .
## Temp_C -8.776686e-02
## SpCond_uSpercm .
## TDS_mgperL .
## pH .
## ORP_mV .
## Chl_Lab_ugperL .
## Cl_mgperL .
## SO4_mgperL .
## NO3_mgperL .
## NH3_mgperL .
## TKN_mgperL .
## SRP_ugperL .
## TP_ugperL .
## Alk_mgperL .
## DO_mgperL .
## DO_percent .
## total_bac_abund .
## attached_bac .
## perc_attached_abund .
## fraction_bac_abund .
## PC1 .
## PC2 .
Lasso - Per Capita Production: Free
# Set the seed for reproducibility of the grid values
set.seed(777)
################ PREPARE DATA ################
# Subset data needed and scale all o
scaled_percapita_fl_data <-
percell_lasso_data_df_free_noprod %>%
dplyr::filter(!is.na(fracprod_per_cell_noinf)) %>%
mutate(log10_percell = log10(fracprod_per_cell_noinf)) %>%
dplyr::select(-c(fracprod_per_cell_noinf)) %>%
scale() %>%
as.data.frame() # Make it a dataframe so that model.matrix function works (does not take a matrix)
# Set model parameters for community level data
## NOTE: there cannot be any data with NA
percap_fl_x = model.matrix(log10_percell ~ ., scaled_percapita_fl_data)[,-1]
percap_fl_y = scaled_percapita_fl_data$log10_percell
percap_fl_grid = 10^seq(10,-2,length = 100)
################ PREPARE TRAINING & TESTING DATA FOR CROSS VALIDATION ################
# Pull out test and training sets for cross validation
# We will use half the set to train the model and the 2nd half of the dataset to test the model
percap_fl_train <- sample(1:nrow(percap_fl_x), nrow(percap_fl_x)/2)
percap_fl_test <- -percap_fl_train
percap_fl_y_test <- percap_fl_y[percap_fl_test]
################ LASSO ################
# Run lasso regression with alpha = 1
percap_fl_lasso_divs_train <- glmnet(percap_fl_x[percap_fl_train,], percap_fl_y[percap_fl_train], alpha = 1, lambda = percap_fl_grid, standardize = FALSE)
par(mfrow = c(1,2))
plot(percap_fl_lasso_divs_train)
# Cross validation
percap_fl_cv_lasso_divs <- cv.glmnet(percap_fl_x[percap_fl_train,], percap_fl_y[percap_fl_train], alpha = 1)
plot(percap_fl_cv_lasso_divs)
percap_fl_best_lasso_lambda <- percap_fl_cv_lasso_divs$lambda.min # Minimum lambda
# https://stats.stackexchange.com/questions/138569/why-is-lambda-plus-1-standard-error-a-recommended-value-for-lambda-in-an-elastic
percap_fl_lasso_lambda_1se <- percap_fl_cv_lasso_divs$lambda.1se # simplest model whose accuracy is comparable with the best model
percap_fl_best_lasso_lambda == percap_fl_lasso_lambda_1se
## [1] TRUE
percap_fl_lasso_divs_pred <- predict(percap_fl_lasso_divs_train, s = percap_fl_best_lasso_lambda, newx = percap_fl_x[percap_fl_test,])
mean((percap_fl_lasso_divs_pred - percap_fl_y_test)^2) # Test
## [1] 1.553166
## Run lasso on the entire dataset with the best lambda value
percap_fl_lasso_divs <- glmnet(percap_fl_x, percap_fl_y, alpha = 1, lambda = percap_fl_best_lasso_lambda, standardize = FALSE)
# What are the lasso coefficients? (Anything with a . is not selected by the model)
predict(percap_fl_lasso_divs, type = "coefficients", s = percap_fl_best_lasso_lambda)
## 32 x 1 sparse Matrix of class "dgCMatrix"
## 1
## (Intercept) -1.642101e-15
## richness .
## shannon .
## simpson .
## phylo_shannon .
## phylo_simpson .
## phylo_richness .
## unweighted_sesmpd .
## weighted_sesmpd .
## Sample_depth_m .
## Temp_C .
## SpCond_uSpercm .
## TDS_mgperL .
## pH -4.346341e-01
## ORP_mV .
## Chl_Lab_ugperL .
## Cl_mgperL .
## SO4_mgperL .
## NO3_mgperL .
## NH3_mgperL .
## TKN_mgperL .
## SRP_ugperL .
## TP_ugperL .
## Alk_mgperL .
## DO_mgperL .
## DO_percent .
## total_bac_abund .
## attached_bac .
## perc_attached_abund .
## fraction_bac_abund .
## PC1 .
## PC2 .
## Now, run the most simple, parsimonious lasso model within 1 standard error
## IMPORTANT NOTE: This is the lasso reported within the manuscript
percap_fl_lasso_divs_1se <- glmnet(percap_fl_x, percap_fl_y, alpha = 1, lambda = percap_fl_lasso_lambda_1se, standardize = FALSE)
# What are the lasso coefficients? (Anything with a . is not selected by the model)
predict(percap_fl_lasso_divs_1se, type = "coefficients", s = percap_fl_lasso_lambda_1se)
## 32 x 1 sparse Matrix of class "dgCMatrix"
## 1
## (Intercept) -1.642101e-15
## richness .
## shannon .
## simpson .
## phylo_shannon .
## phylo_simpson .
## phylo_richness .
## unweighted_sesmpd .
## weighted_sesmpd .
## Sample_depth_m .
## Temp_C .
## SpCond_uSpercm .
## TDS_mgperL .
## pH -4.346341e-01
## ORP_mV .
## Chl_Lab_ugperL .
## Cl_mgperL .
## SO4_mgperL .
## NO3_mgperL .
## NH3_mgperL .
## TKN_mgperL .
## SRP_ugperL .
## TP_ugperL .
## Alk_mgperL .
## DO_mgperL .
## DO_percent .
## total_bac_abund .
## attached_bac .
## perc_attached_abund .
## fraction_bac_abund .
## PC1 .
## PC2 .