The goal of this analysis is to check the sensitivity of the biodiversity-ecosystem function relationships to rare OTUs.
We remove OTUs that have X number of sequences throughout the entire dataset and then check the relationship with diversity versus community-wide and per-capita heterotrophic production. This we call “singletons”, “doubletons”, “5-tons”, “10-tons”, etc all the way up to “300-tons”. For context, removing 10-tons will be removing any OTUs that have a count of less than 10 throughout the entire dataset.
library(ggplot2)
library(devtools)
library(phyloseq)
library(kableExtra)
library(tidyr)
library(dplyr)
library(cowplot)
library(forcats)
library(picante) # Will also include ape and vegan
library(car) # For residual analysis
library(sandwich) # for vcovHC function in post-hoc test
library(MASS) # For studres in plot_residuals function
library(boot) # For cross validation
library(DT) # Pretty HTML Table Output
source("../code/Muskegon_functions.R")
source("../code/set_colors.R")
# Set the ggplot theme
theme_set(theme_cowplot())
# Loads a phyloseq object named otu_merged_musk_pruned)
load("../data/otu_merged_musk_pruned.RData")
# The name of the phyloseq object is: otu_merged_musk_pruned
# Productivity measurements are reliable only up to 1 decimal
df1 <- sample_data(otu_merged_musk_pruned) %>%
dplyr::mutate(tot_bacprod = round(tot_bacprod, digits = 1),
SD_tot_bacprod = round(SD_tot_bacprod, digits = 1),
frac_bacprod = round(frac_bacprod, digits = 1),
SD_frac_bacprod = round(SD_frac_bacprod, digits = 1),
fraction_bac_abund = as.numeric(fraction_bac_abund),
fracprod_per_cell = frac_bacprod/(1000*fraction_bac_abund),
fracprod_per_cell_noinf = ifelse(fracprod_per_cell == Inf, NA, fracprod_per_cell)) %>%
dplyr::select(norep_filter_name, lakesite, limnion, fraction, year, season, tot_bacprod, SD_tot_bacprod, frac_bacprod, SD_frac_bacprod, fraction_bac_abund, fracprod_per_cell, fracprod_per_cell_noinf)
row.names(df1) = df1$norep_filter_name
# Add new sample data back into phyloseq object
sample_data(otu_merged_musk_pruned) <- df1
# Remove MOTHJ715 and MBRHP715 because of low sequencing depth
otu_merged_musk_pruned_noMOTHJ715_MBRHP715 <- subset_samples(otu_merged_musk_pruned, norep_filter_name != "MOTHJ715" & norep_filter_name != "MBRHP715")
# Subset only the surface samples for the current study!!
musk_surface <- subset_samples(otu_merged_musk_pruned_noMOTHJ715_MBRHP715,
limnion == "Top" & year == "2015" &
fraction %in% c("WholePart","WholeFree")) # Surface samples, 2015, and WholePart/WholeFree samples only!
musk_surface_pruned <- prune_taxa(taxa_sums(musk_surface) > 0, musk_surface)
# Remove tree
notree_musk_surface_pruned <- phyloseq(tax_table(musk_surface_pruned), otu_table(musk_surface_pruned), sample_data(musk_surface_pruned))
# Remove singletons!
musk_surface_pruned_rm1 <- prune_taxa(taxa_sums(notree_musk_surface_pruned) > 1, notree_musk_surface_pruned)
# Remove the tree for less computationally intensive steps
notree_musk_surface_pruned_rm1 <- phyloseq(tax_table(musk_surface_pruned_rm1), otu_table(musk_surface_pruned_rm1), sample_data(musk_surface_pruned_rm1))
# If taxa with 2 counts are removed
prune_taxa(taxa_sums(notree_musk_surface_pruned_rm1) > 2, notree_musk_surface_pruned_rm1)
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 2979 taxa and 24 samples ]
## sample_data() Sample Data: [ 24 samples by 13 sample variables ]
## tax_table() Taxonomy Table: [ 2979 taxa by 8 taxonomic ranks ]
# If taxa with 5 counts are removed
notree_musk_surface_pruned_rm5 <- prune_taxa(taxa_sums(notree_musk_surface_pruned) > 5, notree_musk_surface_pruned)
# If taxa with 10 counts are removed
notree_musk_surface_pruned_rm10 <- prune_taxa(taxa_sums(notree_musk_surface_pruned) > 10, notree_musk_surface_pruned)
# If taxa with 20 counts are removed
notree_musk_surface_pruned_rm20 <- prune_taxa(taxa_sums(notree_musk_surface_pruned) > 20, notree_musk_surface_pruned)
# If taxa with 30 counts are removed
notree_musk_surface_pruned_rm30 <- prune_taxa(taxa_sums(notree_musk_surface_pruned) > 30, notree_musk_surface_pruned)
# If taxa with 60 counts are removed
notree_musk_surface_pruned_rm60 <- prune_taxa(taxa_sums(notree_musk_surface_pruned) > 60, notree_musk_surface_pruned)
# If taxa with 90 counts are removed
notree_musk_surface_pruned_rm90 <- prune_taxa(taxa_sums(notree_musk_surface_pruned) > 90, notree_musk_surface_pruned)
# If taxa with 150 counts are removed
notree_musk_surface_pruned_rm150 <- prune_taxa(taxa_sums(notree_musk_surface_pruned) > 150, notree_musk_surface_pruned)
# If taxa with 300 counts are removed
notree_musk_surface_pruned_rm225 <- prune_taxa(taxa_sums(notree_musk_surface_pruned) > 225, notree_musk_surface_pruned)
# If taxa with 300 counts are removed
notree_musk_surface_pruned_rm300 <- prune_taxa(taxa_sums(notree_musk_surface_pruned) > 300, notree_musk_surface_pruned)
set.seed(777)
################## Remove singltons
alpha_rm1 <- calc_alpha_diversity(physeq = notree_musk_surface_pruned_rm1)
otu_alphadiv_rm1 <- calc_mean_alphadiv(physeq = notree_musk_surface_pruned_rm1,
richness_df = alpha_rm1$Richness,
simpson_df = alpha_rm1$Inverse_Simpson,
exp_shannon_df = alpha_rm1$Shannon) %>%
mutate(fraction = factor(fraction, levels = c("WholePart", "Particle", "WholeFree", "Free")),
lakesite = factor(lakesite, levels = c("MOT", "MDP", "MBR", "MIN")),
measure = factor(measure, levels = c("Richness", "Exponential_Shannon", "Inverse_Simpson")),
Removed = "1-tons")
################## Remove 5-tons
alpha_rm5 <- calc_alpha_diversity(physeq = notree_musk_surface_pruned_rm5)
otu_alphadiv_rm5 <- calc_mean_alphadiv(physeq = notree_musk_surface_pruned_rm5,
richness_df = alpha_rm5$Richness,
simpson_df = alpha_rm5$Inverse_Simpson,
exp_shannon_df = alpha_rm5$Shannon) %>%
mutate(fraction = factor(fraction, levels = c("WholePart", "Particle", "WholeFree", "Free")),
lakesite = factor(lakesite, levels = c("MOT", "MDP", "MBR", "MIN")),
measure = factor(measure, levels = c("Richness", "Exponential_Shannon", "Inverse_Simpson")),
Removed = "5-tons")
################## Remove 10-tons
alpha_rm10 <- calc_alpha_diversity(physeq = notree_musk_surface_pruned_rm10)
otu_alphadiv_rm10 <- calc_mean_alphadiv(physeq = notree_musk_surface_pruned_rm10,
richness_df = alpha_rm10$Richness,
simpson_df = alpha_rm10$Inverse_Simpson,
exp_shannon_df = alpha_rm10$Shannon) %>%
mutate(fraction = factor(fraction, levels = c("WholePart", "Particle", "WholeFree", "Free")),
lakesite = factor(lakesite, levels = c("MOT", "MDP", "MBR", "MIN")),
measure = factor(measure, levels = c("Richness", "Exponential_Shannon", "Inverse_Simpson")),
Removed = "10-tons")
################## Remove 20-tons
alpha_rm20 <- calc_alpha_diversity(physeq = notree_musk_surface_pruned_rm20)
otu_alphadiv_rm20 <- calc_mean_alphadiv(physeq = notree_musk_surface_pruned_rm20,
richness_df = alpha_rm20$Richness,
simpson_df = alpha_rm20$Inverse_Simpson,
exp_shannon_df = alpha_rm20$Shannon) %>%
mutate(fraction = factor(fraction, levels = c("WholePart", "Particle", "WholeFree", "Free")),
lakesite = factor(lakesite, levels = c("MOT", "MDP", "MBR", "MIN")),
measure = factor(measure, levels = c("Richness", "Exponential_Shannon", "Inverse_Simpson")),
Removed = "20-tons")
################## Remove 30-tons
alpha_rm30 <- calc_alpha_diversity(physeq = notree_musk_surface_pruned_rm30)
otu_alphadiv_rm30 <- calc_mean_alphadiv(physeq = notree_musk_surface_pruned_rm30,
richness_df = alpha_rm30$Richness,
simpson_df = alpha_rm30$Inverse_Simpson,
exp_shannon_df = alpha_rm30$Shannon) %>%
mutate(fraction = factor(fraction, levels = c("WholePart", "Particle", "WholeFree", "Free")),
lakesite = factor(lakesite, levels = c("MOT", "MDP", "MBR", "MIN")),
measure = factor(measure, levels = c("Richness", "Exponential_Shannon", "Inverse_Simpson")),
Removed = "30-tons")
################## Remove 60-tons
alpha_rm60 <- calc_alpha_diversity(physeq = notree_musk_surface_pruned_rm60)
otu_alphadiv_rm60 <- calc_mean_alphadiv(physeq = notree_musk_surface_pruned_rm60,
richness_df = alpha_rm60$Richness,
simpson_df = alpha_rm60$Inverse_Simpson,
exp_shannon_df = alpha_rm60$Shannon) %>%
mutate(fraction = factor(fraction, levels = c("WholePart", "Particle", "WholeFree", "Free")),
lakesite = factor(lakesite, levels = c("MOT", "MDP", "MBR", "MIN")),
measure = factor(measure, levels = c("Richness", "Exponential_Shannon", "Inverse_Simpson")),
Removed = "60-tons")
################## Remove 90-tons
alpha_rm90 <- calc_alpha_diversity(physeq = notree_musk_surface_pruned_rm90)
otu_alphadiv_rm90 <- calc_mean_alphadiv(physeq = notree_musk_surface_pruned_rm90,
richness_df = alpha_rm90$Richness,
simpson_df = alpha_rm90$Inverse_Simpson,
exp_shannon_df = alpha_rm90$Shannon) %>%
mutate(fraction = factor(fraction, levels = c("WholePart", "Particle", "WholeFree", "Free")),
lakesite = factor(lakesite, levels = c("MOT", "MDP", "MBR", "MIN")),
measure = factor(measure, levels = c("Richness", "Exponential_Shannon", "Inverse_Simpson")),
Removed = "90-tons")
################## Remove 90-tons
alpha_rm150 <- calc_alpha_diversity(physeq = notree_musk_surface_pruned_rm150)
otu_alphadiv_rm150 <- calc_mean_alphadiv(physeq = notree_musk_surface_pruned_rm150,
richness_df = alpha_rm150$Richness,
simpson_df = alpha_rm150$Inverse_Simpson,
exp_shannon_df = alpha_rm150$Shannon) %>%
mutate(fraction = factor(fraction, levels = c("WholePart", "Particle", "WholeFree", "Free")),
lakesite = factor(lakesite, levels = c("MOT", "MDP", "MBR", "MIN")),
measure = factor(measure, levels = c("Richness", "Exponential_Shannon", "Inverse_Simpson")),
Removed = "150-tons")
################## Remove 300-tons
alpha_rm225 <- calc_alpha_diversity(physeq = notree_musk_surface_pruned_rm225)
otu_alphadiv_rm225 <- calc_mean_alphadiv(physeq = notree_musk_surface_pruned_rm225,
richness_df = alpha_rm225$Richness,
simpson_df = alpha_rm225$Inverse_Simpson,
exp_shannon_df = alpha_rm225$Shannon) %>%
mutate(fraction = factor(fraction, levels = c("WholePart", "Particle", "WholeFree", "Free")),
lakesite = factor(lakesite, levels = c("MOT", "MDP", "MBR", "MIN")),
measure = factor(measure, levels = c("Richness", "Exponential_Shannon", "Inverse_Simpson")),
Removed = "225-tons")
################## Remove 300-tons
alpha_rm300 <- calc_alpha_diversity(physeq = notree_musk_surface_pruned_rm300)
otu_alphadiv_rm300 <- calc_mean_alphadiv(physeq = notree_musk_surface_pruned_rm300,
richness_df = alpha_rm300$Richness,
simpson_df = alpha_rm300$Inverse_Simpson,
exp_shannon_df = alpha_rm300$Shannon) %>%
mutate(fraction = factor(fraction, levels = c("WholePart", "Particle", "WholeFree", "Free")),
lakesite = factor(lakesite, levels = c("MOT", "MDP", "MBR", "MIN")),
measure = factor(measure, levels = c("Richness", "Exponential_Shannon", "Inverse_Simpson")),
Removed = "300-tons")
min_seqs <- c(min(sample_sums(notree_musk_surface_pruned_rm1)) - 1, min(sample_sums(notree_musk_surface_pruned_rm5)) - 1,
min(sample_sums(notree_musk_surface_pruned_rm10)) - 1, min(sample_sums(notree_musk_surface_pruned_rm20)) - 1,
min(sample_sums(notree_musk_surface_pruned_rm30)) - 1,
min(sample_sums(notree_musk_surface_pruned_rm60)) - 1, min(sample_sums(notree_musk_surface_pruned_rm90)) - 1,
min(sample_sums(notree_musk_surface_pruned_rm150)) - 1, min(sample_sums(notree_musk_surface_pruned_rm225)) - 1,
min(sample_sums(notree_musk_surface_pruned_rm300)) - 1)
num_otus <- c(ncol(otu_table(notree_musk_surface_pruned_rm1)), ncol(otu_table(notree_musk_surface_pruned_rm5)), ncol(otu_table(notree_musk_surface_pruned_rm10)),
ncol(otu_table(notree_musk_surface_pruned_rm20)),
ncol(otu_table(notree_musk_surface_pruned_rm30)), ncol(otu_table(notree_musk_surface_pruned_rm60)), ncol(otu_table(notree_musk_surface_pruned_rm90)),
ncol(otu_table(notree_musk_surface_pruned_rm150)), ncol(otu_table(notree_musk_surface_pruned_rm225)), ncol(otu_table(notree_musk_surface_pruned_rm300)))
Removed <- c("1-tons","5-tons", "10-tons", "20-tons", "30-tons", "60-tons", "90-tons", "150-tons", "225-tons","300-tons")
statz <- data.frame(cbind(as.numeric(min_seqs), as.numeric(num_otus), Removed)) %>%
mutate(Removed = factor(Removed, levels = c("1-tons","5-tons", "10-tons", "20-tons", "30-tons", "60-tons",
"90-tons", "150-tons", "225-tons","300-tons"))) %>%
mutate(num_otus = as.numeric(num_otus))
p1 <- ggplot(statz, aes(x = Removed, y = min_seqs, fill = Removed)) +
geom_bar(stat = "identity") + ylab("Minimum # of Sequences") +
scale_y_continuous(expand = c(0,0), limits = c(0, 8000)) +
scale_color_manual(values = tons_colors) +
scale_fill_manual(values = tons_colors) +
theme(axis.text.x = element_text(angle = 30, hjust = 1, vjust = 1),
legend.position = "none")
p2 <-ggplot(statz, aes(x = Removed, y = num_otus, fill = Removed)) +
geom_bar(stat = "identity") + ylab("Richness") +
scale_y_continuous(expand = c(0,0)) +
scale_color_manual(values = tons_colors) +
scale_fill_manual(values = tons_colors) +
theme(axis.text.x = element_text(angle = 30, hjust = 1, vjust = 1),
legend.position = c(0.8, 0.65), legend.title = element_blank())
plot_grid(p1, p2, align = "v", labels = c("A", "B"), nrow = 2, ncol =1)
### 15-tons analysis
notree_musk_surface_pruned_rm15 <- prune_taxa(taxa_sums(notree_musk_surface_pruned) > 15, notree_musk_surface_pruned)
alpha_rm15 <- calc_alpha_diversity(physeq = notree_musk_surface_pruned_rm15)
otu_alphadiv_rm15 <- calc_mean_alphadiv(physeq = notree_musk_surface_pruned_rm15,
richness_df = alpha_rm15$Richness,
simpson_df = alpha_rm15$Inverse_Simpson,
exp_shannon_df = alpha_rm15$Shannon) %>%
mutate(fraction = factor(fraction, levels = c("WholePart", "Particle", "WholeFree", "Free")),
lakesite = factor(lakesite, levels = c("MOT", "MDP", "MBR", "MIN")),
measure = factor(measure, levels = c("Richness", "Exponential_Shannon", "Inverse_Simpson")),
Removed = "15-tons")
### To test for per-capita production
summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = filter(otu_alphadiv_rm15, measure == "Richness" & fraction == "WholePart")))
##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mean, data = filter(otu_alphadiv_rm15,
## measure == "Richness" & fraction == "WholePart"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.77022 -0.22302 -0.06991 0.19827 0.70698
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -9.346686 1.018887 -9.173 7.3e-06 ***
## mean 0.007120 0.002746 2.593 0.0291 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4245 on 9 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.4276, Adjusted R-squared: 0.364
## F-statistic: 6.723 on 1 and 9 DF, p-value: 0.02908
summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = filter(otu_alphadiv_rm15, measure == "Inverse_Simpson" & fraction == "WholePart")))
##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mean, data = filter(otu_alphadiv_rm15,
## measure == "Inverse_Simpson" & fraction == "WholePart"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.37066 -0.17949 -0.01083 0.06423 0.58447
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.447388 0.178394 -41.747 1.29e-11 ***
## mean 0.024230 0.005153 4.703 0.00112 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3018 on 9 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.7107, Adjusted R-squared: 0.6786
## F-statistic: 22.11 on 1 and 9 DF, p-value: 0.001116
### 25-tons analysis
notree_musk_surface_pruned_rm25 <- prune_taxa(taxa_sums(notree_musk_surface_pruned) > 25, notree_musk_surface_pruned)
alpha_rm25 <- calc_alpha_diversity(physeq = notree_musk_surface_pruned_rm25)
otu_alphadiv_rm25 <- calc_mean_alphadiv(physeq = notree_musk_surface_pruned_rm25,
richness_df = alpha_rm25$Richness,
simpson_df = alpha_rm25$Inverse_Simpson,
exp_shannon_df = alpha_rm25$Shannon) %>%
mutate(fraction = factor(fraction, levels = c("WholePart", "Particle", "WholeFree", "Free")),
lakesite = factor(lakesite, levels = c("MOT", "MDP", "MBR", "MIN")),
measure = factor(measure, levels = c("Richness", "Exponential_Shannon", "Inverse_Simpson")),
Removed = "25-tons")
summary(lm(frac_bacprod ~ mean, data = filter(otu_alphadiv_rm25, measure == "Richness" & fraction == "WholePart")))
##
## Call:
## lm(formula = frac_bacprod ~ mean, data = filter(otu_alphadiv_rm25,
## measure == "Richness" & fraction == "WholePart"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -13.3718 -3.4614 -0.2738 3.5805 13.6800
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -41.55935 22.49609 -1.847 0.0944 .
## mean 0.16481 0.07168 2.299 0.0443 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.992 on 10 degrees of freedom
## Multiple R-squared: 0.3458, Adjusted R-squared: 0.2804
## F-statistic: 5.287 on 1 and 10 DF, p-value: 0.0443
summary(lm(frac_bacprod ~ mean, data = filter(otu_alphadiv_rm25, measure == "Inverse_Simpson" & fraction == "WholePart")))
##
## Call:
## lm(formula = frac_bacprod ~ mean, data = filter(otu_alphadiv_rm25,
## measure == "Inverse_Simpson" & fraction == "WholePart"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.6461 -1.3567 -0.4754 0.6347 7.9761
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.45221 2.76293 -0.888 0.39563
## mean 0.43118 0.08444 5.106 0.00046 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.552 on 10 degrees of freedom
## Multiple R-squared: 0.7228, Adjusted R-squared: 0.6951
## F-statistic: 26.07 on 1 and 10 DF, p-value: 0.0004598
# Combine all div metrics
all_divs <- bind_rows(otu_alphadiv_rm1, otu_alphadiv_rm5, otu_alphadiv_rm10, otu_alphadiv_rm20, otu_alphadiv_rm30,
otu_alphadiv_rm60, otu_alphadiv_rm90, otu_alphadiv_rm150,
otu_alphadiv_rm225, otu_alphadiv_rm300) %>%
dplyr::filter(fraction %in% c("WholePart", "WholeFree") & year == "2015") %>%
mutate(fraction = fct_recode(fraction, "Particle" = "WholePart", "Free" = "WholeFree")) %>%
mutate(Removed = factor(Removed, levels = c("1-tons","5-tons", "10-tons", "20-tons", "30-tons", "60-tons",
"90-tons", "150-tons", "225-tons","300-tons")))
### PLOT
ggplot(dplyr::filter(all_divs, measure == "Richness"),
aes(y = mean, x = Removed, color = Removed, fill = Removed)) +
geom_boxplot(alpha = 0.3, outlier.shape = NA) + geom_point(size = 3, position = position_jitter(w = 0.1)) +
scale_color_manual(values = tons_colors) +
scale_fill_manual(values = tons_colors) +
facet_grid(.~fraction) +
ylab("Mean Richness") +
theme(legend.position = "bottom", legend.title = element_blank(),
axis.text.x = element_text(angle = 30, hjust = 1, vjust = 1),
axis.title.x = element_blank())
ggplot(dplyr::filter(all_divs, measure == "Richness"),
aes(y = mean, x = fraction, color = Removed, fill = Removed)) +
geom_boxplot(alpha = 0.3, outlier.shape = NA) + geom_point(size = 3, position = position_jitter(w = 0.1)) +
scale_color_manual(values = tons_colors) +
scale_fill_manual(values = tons_colors) +
facet_grid(.~Removed) +
ylab("Mean Richness") +
theme(legend.position = "bottom", legend.title = element_blank(),
axis.text.x = element_text(angle = 30, hjust = 1, vjust = 1),
axis.title.x = element_blank())
# Linear Model output
richness_lm_results <- lm_fraction_output(dataframe = dplyr::filter(all_divs, measure == "Richness"))
sig_rich_lms_df <- richness_lm_results %>%
bind_rows() %>%
mutate(diversity_metric = "Richness") %>%
filter(pval < 0.05)
# Display significant models in a dataframe
datatable(sig_rich_lms_df, options = list(pageLength = 40))
### Community-Wide Production vs Richness
sig_rich_lms_comm <- sig_rich_lms_df %>%
filter(test == "Community-Wide Production" & fraction == "Particle") %>%
dplyr::select(Removed) %>%
.$Removed
ggplot(dplyr::filter(all_divs, measure == "Richness"),
aes(y = frac_bacprod, x = mean, color = Removed, fill = Removed)) +
geom_point(size = 3) + xlab("Richness") +
ylab("Community-Wide Production") +
geom_smooth(method = "lm", data = filter(all_divs,
measure == "Richness" & fraction == "Particle" & Removed %in% sig_rich_lms_comm)) +
scale_color_manual(values = tons_colors) +scale_fill_manual(values = tons_colors) +
facet_grid(fraction~Removed, scales = "free") +
theme(legend.position = "bottom", legend.title = element_blank(),
axis.text.x = element_text(angle = 30, hjust = 1, vjust = 1))
### Per-capita production vs Richness
sig_rich_lms_percap <- sig_rich_lms_df %>%
filter(test == "Per-Capita Production") %>%
dplyr::select(Removed) %>%
.$Removed
ggplot(dplyr::filter(all_divs, measure == "Richness"),
aes(y = log10(fracprod_per_cell_noinf), x = mean, color = Removed, fill = Removed)) +
geom_point(size = 3) + xlab("Richness") +
ylab("log10(Per-Capita Production)") +
geom_smooth(method = "lm", data = filter(all_divs,
measure == "Richness" & fraction == "Particle" & Removed %in% sig_rich_lms_percap)) +
scale_color_manual(values = tons_colors) + scale_fill_manual(values = tons_colors) +
facet_grid(fraction~Removed, scales = "free") +
theme(legend.position = "bottom", legend.title = element_blank(),
axis.text.x = element_text(angle = 30, hjust = 1, vjust = 1))
### PLOT
ggplot(dplyr::filter(all_divs, measure == "Exponential_Shannon"),
aes(y = mean, x = Removed, color = Removed, fill = Removed)) +
geom_boxplot(alpha = 0.3, outlier.shape = NA) + geom_point(size = 3, position = position_jitter(w = 0.1)) +
scale_color_manual(values = tons_colors) +
scale_fill_manual(values = tons_colors) +
facet_grid(.~fraction) +
ylab("Mean Exp(Shannon Entropy)") +
theme(legend.position = "bottom", legend.title = element_blank(),
axis.text.x = element_text(angle = 30, hjust = 1, vjust = 1),
axis.title.x = element_blank())
ggplot(dplyr::filter(all_divs, measure == "Exponential_Shannon"),
aes(y = mean, x = fraction, color = Removed, fill = Removed)) +
geom_boxplot(alpha = 0.3, outlier.shape = NA) + geom_point(size = 3, position = position_jitter(w = 0.1)) +
scale_color_manual(values = tons_colors) +
scale_fill_manual(values = tons_colors) +
facet_grid(.~Removed) +
ylab("Mean Exp(Shannon Entropy)") +
theme(legend.position = "bottom", legend.title = element_blank(),
axis.text.x = element_text(angle = 30, hjust = 1, vjust = 1),
axis.title.x = element_blank())
# Linear Model output
shannon_lm_results <- lm_fraction_output(dataframe = dplyr::filter(all_divs, measure == "Exponential_Shannon"))
sig_shannon_lms_df <- shannon_lm_results %>%
bind_rows() %>%
mutate(diversity_metric = "Exponential_Shannon") %>%
filter(pval < 0.05)
# Display significant models in a table
datatable(sig_shannon_lms_df, options = list(pageLength = 40))
### Community-Wide Production vs Shannon Entropy
sig_shannon_lms_comm <- sig_shannon_lms_df %>%
filter(test == "Community-Wide Production" & fraction == "Particle") %>%
dplyr::select(Removed) %>%
.$Removed
### Community-Wide production vs Shannon Entropy
ggplot(dplyr::filter(all_divs, measure == "Exponential_Shannon"),
aes(y = frac_bacprod, x = mean, color = Removed, fill = Removed)) +
geom_point(size = 3) +
xlab("Shannon Entropy") +
ylab("Bacterial Production by Fraction") +
geom_smooth(method = "lm",
data = filter(all_divs, measure == "Exponential_Shannon" & fraction == "Particle" & Removed %in% sig_shannon_lms_comm)) +
scale_color_manual(values = tons_colors) +
scale_fill_manual(values = tons_colors) +
facet_grid(fraction~Removed, scales = "free") +
theme(legend.position = "bottom", legend.title = element_blank(),
axis.text.x = element_text(angle = 30, hjust = 1, vjust = 1))
### Per-Capita Production vs Shannon Entropy
sig_shannon_lms_percap <- sig_shannon_lms_df %>%
filter(test == "Per-Capita Production" & fraction == "Particle") %>%
dplyr::select(Removed) %>%
.$Removed
### Per-capita production vs Shannon Entropy
ggplot(dplyr::filter(all_divs, measure == "Exponential_Shannon"),
aes(y = log10(fracprod_per_cell_noinf), x = mean, color = Removed, fill = Removed)) +
geom_point(size = 3) + xlab("Shannon Entropy") +
ylab("log10(Per-Capita Production)") +
geom_smooth(method = "lm",
data = filter(all_divs, measure == "Exponential_Shannon" & fraction == "Particle" & Removed %in% sig_shannon_lms_percap)) +
scale_color_manual(values = tons_colors) + scale_fill_manual(values = tons_colors) +
facet_grid(fraction~Removed, scales = "free") +
theme(legend.position = "bottom", legend.title = element_blank(),
axis.text.x = element_text(angle = 30, hjust = 1, vjust = 1))
### PLOT
ggplot(dplyr::filter(all_divs, measure == "Inverse_Simpson"),
aes(y = mean, x = Removed, color = Removed, fill = Removed)) +
geom_boxplot(alpha = 0.3, outlier.shape = NA) + geom_point(size = 3, position = position_jitter(w = 0.1)) +
scale_color_manual(values = tons_colors) +
scale_fill_manual(values = tons_colors) +
facet_grid(.~fraction) +
ylab("Mean Inverse_Simpson") +
theme(legend.position = "bottom", legend.title = element_blank(),
axis.text.x = element_text(angle = 30, hjust = 1, vjust = 1),
axis.title.x = element_blank())
ggplot(dplyr::filter(all_divs, measure == "Inverse_Simpson"),
aes(y = mean, x = fraction, color = Removed, fill = Removed)) +
geom_boxplot(alpha = 0.3, outlier.shape = NA) + geom_point(size = 3, position = position_jitter(w = 0.1)) +
scale_color_manual(values = tons_colors) +
scale_fill_manual(values = tons_colors) +
facet_grid(.~Removed) +
ylab("Mean Inverse_Simpson") +
theme(legend.position = "bottom", legend.title = element_blank(),
axis.text.x = element_text(angle = 30, hjust = 1, vjust = 1),
axis.title.x = element_blank())
# Linear Model output
invsimps_lm_results <- lm_fraction_output(dataframe = dplyr::filter(all_divs, measure == "Inverse_Simpson"))
sig_invsimps_lms_df <- invsimps_lm_results %>%
bind_rows() %>%
mutate(diversity_metric = "Inverse_Simpson") %>%
filter(pval < 0.05)
# Display significant models in a dataframe
datatable(sig_invsimps_lms_df, options = list(pageLength = 40))
### Community-Wide Production vs Inverse Simpson
sig_invsimps_lms_comm <- sig_invsimps_lms_df %>%
filter(test == "Community-Wide Production" & fraction == "Particle") %>%
dplyr::select(Removed) %>%
.$Removed
### Community Wide production vs Inverse Simpson
ggplot(dplyr::filter(all_divs, measure == "Inverse_Simpson"),
aes(y = frac_bacprod, x = mean, color = Removed, fill = Removed)) +
geom_point(size = 3) + xlab("Inverse Simpson") +
ylab("Community-Wide Production") +
geom_smooth(method = "lm",
data = filter(all_divs, measure == "Inverse_Simpson" & fraction == "Particle" & Removed %in% sig_invsimps_lms_comm)) +
scale_color_manual(values = tons_colors) +
scale_fill_manual(values = tons_colors) +
facet_grid(fraction~Removed, scales = "free") +
theme(legend.position = "bottom", legend.title = element_blank(),
axis.text.x = element_text(angle = 30, hjust = 1, vjust = 1))
### Per-Capita Production vs Inverse Simpson
sig_invsimps_lms_percap <- sig_invsimps_lms_df %>%
filter(test == "Per-Capita Production" & fraction == "Particle") %>%
dplyr::select(Removed) %>%
.$Removed
### Per-capita production vs Inverse Simpson
ggplot(dplyr::filter(all_divs, measure == "Inverse_Simpson"),
aes(y = log10(fracprod_per_cell_noinf), x = mean, color = Removed, fill = Removed)) +
geom_point(size = 3) + xlab("Inverse Simpson") +
ylab("log10(Per-Capita Production)") +
geom_smooth(method = "lm",
data = filter(all_divs, measure == "Inverse_Simpson" & fraction == "Particle" & Removed %in% sig_invsimps_lms_percap)) +
scale_color_manual(values = tons_colors) + scale_fill_manual(values = tons_colors) +
facet_grid(fraction~Removed, scales = "free") +
theme(legend.position = "bottom", legend.title = element_blank(),
axis.text.x = element_text(angle = 30, hjust = 1, vjust = 1))
p1 <- ggplot(dplyr::filter(all_divs, measure == "Richness" & fraction == "Particle"),
aes(y = frac_bacprod, x = mean, color = Removed, fill = Removed)) +
geom_point(size = 3) + xlab("Richness") +
ylab("Community-Wide \nProduction") +
geom_smooth(method = "lm", data = filter(all_divs,
measure == "Richness" & fraction == "Particle" & Removed %in% sig_rich_lms_comm)) +
scale_color_manual(values = tons_colors) +scale_fill_manual(values = tons_colors) +
facet_grid(~Removed, scales = "free") +
theme(legend.position = "none", legend.title = element_blank(),
axis.text.x = element_blank(),axis.title.x = element_blank())
p2 <- ggplot(dplyr::filter(all_divs, measure == "Richness" & fraction == "Particle"),
aes(y = log10(fracprod_per_cell_noinf), x = mean, color = Removed, fill = Removed)) +
geom_point(size = 3) + xlab("Richness") +
ylab("log10(Per-Capita \nProduction)") +
geom_smooth(method = "lm", data = filter(all_divs,
measure == "Richness" & fraction == "Particle" & Removed %in% sig_rich_lms_percap)) +
scale_color_manual(values = tons_colors) + scale_fill_manual(values = tons_colors) +
facet_grid(~Removed, scales = "free") +
theme(legend.position = "none", legend.title = element_blank(),
axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1, size = 10),
axis.title.x = element_text(face = "bold"))
p3 <- ggplot(dplyr::filter(all_divs, measure == "Inverse_Simpson" & fraction == "Particle"),
aes(y = frac_bacprod, x = mean, color = Removed, fill = Removed)) +
geom_point(size = 3) + xlab("Inverse Simpson") +
ylab("Community-Wide \nProduction") +
geom_smooth(method = "lm",
data = filter(all_divs, measure == "Inverse_Simpson" & fraction == "Particle" & Removed %in% sig_invsimps_lms_comm)) +
scale_color_manual(values = tons_colors) +
scale_fill_manual(values = tons_colors) +
facet_grid(~Removed, scales = "free") +
theme(legend.position = "none", legend.title = element_blank(),
axis.text.x = element_blank(),axis.title.x = element_blank())
p4 <- ggplot(dplyr::filter(all_divs, measure == "Inverse_Simpson" & fraction == "Particle"),
aes(y = log10(fracprod_per_cell_noinf), x = mean, color = Removed, fill = Removed)) +
geom_point(size = 3) + xlab("Inverse Simpson") +
ylab("log10(Per-Capita \nProduction)") +
geom_smooth(method = "lm",
data = filter(all_divs, measure == "Inverse_Simpson" & fraction == "Particle" & Removed %in% sig_invsimps_lms_percap)) +
scale_color_manual(values = tons_colors) + scale_fill_manual(values = tons_colors) +
facet_grid(~Removed, scales = "free") +
theme(legend.position = "none", legend.title = element_blank(),
axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1, size = 10),
axis.title.x = element_text(face = "bold"))
p5 <- ggplot(dplyr::filter(all_divs, measure == "Exponential_Shannon" & fraction == "Particle"),
aes(y = frac_bacprod, x = mean, color = Removed, fill = Removed)) +
geom_point(size = 3) + xlab("Exponential Shannon") +
ylab("Community-Wide \nProduction") +
geom_smooth(method = "lm",
data = filter(all_divs, measure == "Exponential_Shannon" & fraction == "Particle" & Removed %in% sig_shannon_lms_comm)) +
scale_color_manual(values = tons_colors) +
scale_fill_manual(values = tons_colors) +
facet_grid(~Removed, scales = "free") +
theme(legend.position = "none", legend.title = element_blank(),
axis.text.x = element_blank(),axis.title.x = element_blank())
p6 <- ggplot(dplyr::filter(all_divs, measure == "Exponential_Shannon" & fraction == "Particle"),
aes(y = log10(fracprod_per_cell_noinf), x = mean, color = Removed, fill = Removed)) +
geom_point(size = 3) + xlab("Exponential Shannon") +
ylab("log10(Per-Capita \nProduction)") +
geom_smooth(method = "lm",
data = filter(all_divs, measure == "Exponential_Shannon" & fraction == "Particle" & Removed %in% sig_shannon_lms_percap)) +
scale_color_manual(values = tons_colors) + scale_fill_manual(values = tons_colors) +
facet_grid(~Removed, scales = "free") +
theme(legend.position = "none", legend.title = element_blank(),
axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1, size = 10),
axis.title.x = element_text(face = "bold"))
plot_grid(p1, p2, p5, p6, p3, p4,
nrow = 6, ncol = 1,
rel_heights = c(0.9, 1, 0.9, 1.2, 0.9, 1.2),
labels = c("A", "B", "C", "D", "E", "F"),
align = "v")
devtools::session_info() # This will include session info with all R package version information
## ─ Session info ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
## setting value
## version R version 3.6.2 (2019-12-12)
## os macOS Mojave 10.14.6
## system x86_64, darwin15.6.0
## ui X11
## language (EN)
## collate en_US.UTF-8
## ctype en_US.UTF-8
## tz America/Chicago
## date 2020-02-10
##
## ─ Packages ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
## package * version date lib source
## abind 1.4-5 2016-07-21 [1] CRAN (R 3.6.0)
## ade4 1.7-13 2018-08-31 [1] CRAN (R 3.6.0)
## ape * 5.3 2019-03-17 [1] CRAN (R 3.6.0)
## assertthat 0.2.1 2019-03-21 [1] CRAN (R 3.6.0)
## backports 1.1.5 2019-10-02 [1] CRAN (R 3.6.0)
## Biobase 2.44.0 2019-05-02 [1] Bioconductor
## BiocGenerics 0.30.0 2019-05-02 [1] Bioconductor
## biomformat 1.12.0 2019-05-02 [1] Bioconductor
## Biostrings 2.52.0 2019-05-02 [1] Bioconductor
## boot * 1.3-23 2019-07-05 [1] CRAN (R 3.6.2)
## callr 3.3.0 2019-07-04 [1] CRAN (R 3.6.0)
## car * 3.0-6 2019-12-23 [1] CRAN (R 3.6.0)
## carData * 3.0-3 2019-11-16 [1] CRAN (R 3.6.0)
## cellranger 1.1.0 2016-07-27 [1] CRAN (R 3.6.0)
## cli 2.0.1 2020-01-08 [1] CRAN (R 3.6.0)
## cluster 2.1.0 2019-06-19 [1] CRAN (R 3.6.2)
## codetools 0.2-16 2018-12-24 [1] CRAN (R 3.6.2)
## colorspace 1.4-1 2019-03-18 [1] CRAN (R 3.6.0)
## cowplot * 1.0.0 2019-07-11 [1] CRAN (R 3.6.0)
## crayon 1.3.4 2017-09-16 [1] CRAN (R 3.6.0)
## crosstalk 1.0.0 2016-12-21 [1] CRAN (R 3.6.0)
## curl 4.3 2019-12-02 [1] CRAN (R 3.6.0)
## data.table 1.12.8 2019-12-09 [1] CRAN (R 3.6.0)
## desc 1.2.0 2018-05-01 [1] CRAN (R 3.6.0)
## devtools * 2.2.1 2019-09-24 [1] CRAN (R 3.6.0)
## digest 0.6.23 2019-11-23 [1] CRAN (R 3.6.0)
## dplyr * 0.8.3 2019-07-04 [1] CRAN (R 3.6.0)
## DT * 0.7 2019-06-11 [1] CRAN (R 3.6.0)
## ellipsis 0.3.0 2019-09-20 [1] CRAN (R 3.6.0)
## evaluate 0.14 2019-05-28 [1] CRAN (R 3.6.0)
## fansi 0.4.1 2020-01-08 [1] CRAN (R 3.6.0)
## farver 2.0.3 2020-01-16 [1] CRAN (R 3.6.0)
## forcats * 0.4.0 2019-02-17 [1] CRAN (R 3.6.0)
## foreach 1.4.4 2017-12-12 [1] CRAN (R 3.6.0)
## foreign 0.8-72 2019-08-02 [1] CRAN (R 3.6.2)
## fs 1.3.1 2019-05-06 [1] CRAN (R 3.6.0)
## ggplot2 * 3.2.1 2019-08-10 [1] CRAN (R 3.6.0)
## glue 1.3.1 2019-03-12 [1] CRAN (R 3.6.0)
## gtable 0.3.0 2019-03-25 [1] CRAN (R 3.6.0)
## haven 2.2.0 2019-11-08 [1] CRAN (R 3.6.0)
## hms 0.5.3 2020-01-08 [1] CRAN (R 3.6.0)
## htmltools 0.4.0 2019-10-04 [1] CRAN (R 3.6.0)
## htmlwidgets 1.5.1 2019-10-08 [1] CRAN (R 3.6.0)
## httpuv 1.5.1 2019-04-05 [1] CRAN (R 3.6.0)
## httr 1.4.0 2018-12-11 [1] CRAN (R 3.6.0)
## igraph 1.2.4.2 2019-11-27 [1] CRAN (R 3.6.0)
## IRanges 2.18.1 2019-05-31 [1] Bioconductor
## iterators 1.0.10 2018-07-13 [1] CRAN (R 3.6.0)
## jsonlite 1.6 2018-12-07 [1] CRAN (R 3.6.0)
## kableExtra * 1.1.0 2019-03-16 [1] CRAN (R 3.6.0)
## knitr 1.27 2020-01-16 [1] CRAN (R 3.6.0)
## labeling 0.3 2014-08-23 [1] CRAN (R 3.6.0)
## later 0.8.0 2019-02-11 [1] CRAN (R 3.6.0)
## lattice * 0.20-38 2018-11-04 [1] CRAN (R 3.6.2)
## lazyeval 0.2.2 2019-03-15 [1] CRAN (R 3.6.0)
## lifecycle 0.1.0 2019-08-01 [1] CRAN (R 3.6.0)
## magrittr 1.5 2014-11-22 [1] CRAN (R 3.6.0)
## MASS * 7.3-51.4 2019-03-31 [1] CRAN (R 3.6.2)
## Matrix 1.2-18 2019-11-27 [1] CRAN (R 3.6.2)
## memoise 1.1.0 2017-04-21 [1] CRAN (R 3.6.0)
## mgcv 1.8-31 2019-11-09 [1] CRAN (R 3.6.2)
## mime 0.8 2019-12-19 [1] CRAN (R 3.6.0)
## multtest 2.40.0 2019-05-02 [1] Bioconductor
## munsell 0.5.0 2018-06-12 [1] CRAN (R 3.6.0)
## nlme * 3.1-142 2019-11-07 [1] CRAN (R 3.6.2)
## openxlsx 4.1.4 2019-12-06 [1] CRAN (R 3.6.0)
## permute * 0.9-5 2019-03-12 [1] CRAN (R 3.6.0)
## phyloseq * 1.28.0 2019-05-02 [1] Bioconductor
## picante * 1.8 2019-03-21 [1] CRAN (R 3.6.0)
## pillar 1.4.3 2019-12-20 [1] CRAN (R 3.6.0)
## pkgbuild 1.0.3 2019-03-20 [1] CRAN (R 3.6.0)
## pkgconfig 2.0.3 2019-09-22 [1] CRAN (R 3.6.0)
## pkgload 1.0.2 2018-10-29 [1] CRAN (R 3.6.0)
## plyr 1.8.5 2019-12-10 [1] CRAN (R 3.6.0)
## prettyunits 1.1.1 2020-01-24 [1] CRAN (R 3.6.0)
## processx 3.4.0 2019-07-03 [1] CRAN (R 3.6.0)
## promises 1.0.1 2018-04-13 [1] CRAN (R 3.6.0)
## ps 1.3.0 2018-12-21 [1] CRAN (R 3.6.0)
## purrr 0.3.3 2019-10-18 [1] CRAN (R 3.6.0)
## R6 2.4.1 2019-11-12 [1] CRAN (R 3.6.0)
## Rcpp 1.0.3 2019-11-08 [1] CRAN (R 3.6.0)
## readr 1.3.1 2018-12-21 [1] CRAN (R 3.6.0)
## readxl 1.3.1 2019-03-13 [1] CRAN (R 3.6.0)
## remotes 2.1.0 2019-06-24 [1] CRAN (R 3.6.0)
## reshape2 1.4.3 2017-12-11 [1] CRAN (R 3.6.0)
## rhdf5 2.28.0 2019-05-02 [1] Bioconductor
## Rhdf5lib 1.6.0 2019-05-02 [1] Bioconductor
## rio 0.5.16 2018-11-26 [1] CRAN (R 3.6.0)
## rlang 0.4.2 2019-11-23 [1] CRAN (R 3.6.0)
## rmarkdown 1.13 2019-05-22 [1] CRAN (R 3.6.0)
## rprojroot 1.3-2 2018-01-03 [1] CRAN (R 3.6.0)
## rstudioapi 0.10 2019-03-19 [1] CRAN (R 3.6.0)
## rvest 0.3.4 2019-05-15 [1] CRAN (R 3.6.0)
## S4Vectors 0.22.0 2019-05-02 [1] Bioconductor
## sandwich * 2.5-1 2019-04-06 [1] CRAN (R 3.6.0)
## scales 1.1.0 2019-11-18 [1] CRAN (R 3.6.0)
## sessioninfo 1.1.1 2018-11-05 [1] CRAN (R 3.6.0)
## shiny 1.3.2 2019-04-22 [1] CRAN (R 3.6.0)
## stringi 1.4.5 2020-01-11 [1] CRAN (R 3.6.0)
## stringr 1.4.0 2019-02-10 [1] CRAN (R 3.6.0)
## survival 3.1-8 2019-12-03 [1] CRAN (R 3.6.2)
## testthat 2.1.1 2019-04-23 [1] CRAN (R 3.6.0)
## tibble 2.1.3 2019-06-06 [1] CRAN (R 3.6.0)
## tidyr * 1.0.2 2020-01-24 [1] CRAN (R 3.6.0)
## tidyselect 0.2.5 2018-10-11 [1] CRAN (R 3.6.0)
## usethis * 1.5.1 2019-07-04 [1] CRAN (R 3.6.0)
## vctrs 0.2.1 2019-12-17 [1] CRAN (R 3.6.0)
## vegan * 2.5-6 2019-09-01 [1] CRAN (R 3.6.0)
## viridisLite 0.3.0 2018-02-01 [1] CRAN (R 3.6.0)
## webshot 0.5.2 2019-11-22 [1] CRAN (R 3.6.0)
## withr 2.1.2 2018-03-15 [1] CRAN (R 3.6.0)
## xfun 0.12 2020-01-13 [1] CRAN (R 3.6.0)
## xml2 1.2.0 2018-01-24 [1] CRAN (R 3.6.0)
## xtable 1.8-4 2019-04-21 [1] CRAN (R 3.6.0)
## XVector 0.24.0 2019-05-02 [1] Bioconductor
## yaml 2.2.0 2018-07-25 [1] CRAN (R 3.6.0)
## zeallot 0.1.0 2018-01-28 [1] CRAN (R 3.6.0)
## zip 2.0.4 2019-09-01 [1] CRAN (R 3.6.0)
## zlibbioc 1.30.0 2019-05-02 [1] Bioconductor
## zoo 1.8-7 2020-01-10 [1] CRAN (R 3.6.0)
##
## [1] /Library/Frameworks/R.framework/Versions/3.6/Resources/library