Load Libraries & Set Themes

library(devtools)   # Reproducibility (see end of file)
library(phyloseq)   # Easier data manipulation  
library(tidyverse)  # Pretty plotting and data manipulation 
library(forcats)    # Recoding factors
library(cowplot)    # Multiple plotting
library(d3heatmap)  # For nice heatmaps
library(phytools)   # Working with phylogenetic trees
library(ggtree)     # Pretty ggplot-esque phylogenetic trees

## Error: package or namespace load failed for 'ggtree':
##  object 'as_data_frame' is not exported by 'namespace:tidytree'

library(ape)        # Working with phylogenetic trees
library(DT)         # Pretty Tables 
library(gplots)     # Heatmap.2 function
library(stringr)    # To replace zeros in OTU names
library(adephylo)   # Testing phylogenetic autocorrelation
library(geiger)     # Phylogenetic signal 
source("code/set_colors.R")           # Set Colors for plotting

# Set some global ggplot parameters
mytheme <- theme(legend.text = element_text(size = 8), legend.title = element_text(size = 8, face = "bold"), 
                 plot.title = element_text(size = 10), legend.position = c(0.01, 0.9),
                 axis.title = element_text(size = 10, face = "bold"), axis.text = element_text(size = 8),
                 legend.key.width=unit(0.1,"line"), legend.key.height=unit(0.1,"line"))

Load FCM Data

# Read in the data 
raw_data <- read.table(file="data/Chloroplasts_removed/old/nochloro_HNA_LNA.tsv", header = TRUE) %>%
  mutate(norep_filter_name = paste(substr(Sample_16S,1,4), substr(Sample_16S,6,9), sep = "")) %>%
  arrange(norep_filter_name)

# Subset out only the Muskegon and Surface samples 
muskegon_data <- raw_data %>%
  dplyr::filter(Lake == "Muskegon" & Depth == "Surface") %>%
  dplyr::select(norep_filter_name)

# Load in the productivity data
production <- read.csv(file="data/production_data.csv", header = TRUE) %>%    
  dplyr::filter(fraction == "Free" & limnion == "Surface") %>%                         # Select only rows that are free-living
  dplyr::select(names, tot_bacprod, SD_tot_bacprod) %>%         # Select relevant columns for total productivity
  mutate(tot_bacprod = round(tot_bacprod, digits = 2),          # Round to 2 decimals
         SD_tot_bacprod = round(SD_tot_bacprod, digits = 2) ) %>%      
  dplyr::rename(norep_filter_name = names) %>%                  # Rename to match other data frame
  arrange(norep_filter_name)

# Stop if the names do not match
stopifnot(muskegon_data$norep_filter_name == production$norep_filter_name)

# Combine the two muskegon data frames into one
combined_data <- left_join(muskegon_data, production, by = "norep_filter_name")

# Merge the combined data back into the original data frame (data)
data <- left_join(raw_data, combined_data, by = "norep_filter_name") %>%
  dplyr::select(-c(Platform, Fraction)) %>%
  mutate(Lake = factor(Lake, levels = c("Michigan","Inland", "Muskegon")))
head(data)

##       samples Total.cells HNA.cells LNA.cells Total.count.sd    HNA.sd    LNA.sd     Lake Sample_16S Season     Month Year  Site Depth Total_Sequences norep_filter_name tot_bacprod SD_tot_bacprod
## 1 110D2-115-2    620420.9  139880.7  480540.2       1640.275  4639.952  5795.307 Michigan  110D2F115 Winter   Januari 2015 MM110  Deep           19654          110DF115          NA             NA
## 2 110D2-415-2    799314.0  155198.1  644115.9       9388.845  4198.741  5493.442 Michigan  110D2F415 Spring     April 2015 MM110  Deep            7951          110DF415          NA             NA
## 3   110D2-515   1261369.4  362443.6  898925.9      15341.779 10504.696  6299.495 Michigan  110D2F515 Spring       May 2015 MM110  Deep           16293          110DF515          NA             NA
## 4 110D2-715-2    542723.6  131132.1  411591.5       6139.696  2196.008  3967.958 Michigan  110D2F715 Summer      July 2015 MM110  Deep           16882          110DF715          NA             NA
## 5   110D2-815    548027.9  131820.5  416207.4       3552.010  3805.294  2441.935 Michigan  110D2F815 Summer    August 2015 MM110  Deep           22157          110DF815          NA             NA
## 6 110D2-915-2    731931.0  189746.2  542184.8      36726.396  6634.230 30473.925 Michigan  110D2F915   Fall September 2015 MM110  Deep           16500          110DF915          NA             NA

# Fix the metadata
data$Month[data$Month == "Januari"] <- "June"
data$Season[data$Season == "Winter"] <- "Summer"
data$Lake[data$Site == "MLB"] <- "Muskegon"

# Write out the file 
#write.table(data, file="data/Chloroplasts_removed/productivity_data.tsv", row.names=TRUE)

####### HELPFUL TRANSFORMED AND SUBSETTED DATA
##### Subset Muskegon Lake Data Only 
muskegon <- dplyr::filter(data, Lake == "Muskegon" & Depth == "Surface") %>%
  mutate(Site = factor(Site, levels = c("MOT", "MDP", "MBR", "MIN"))) %>%
  filter(tot_bacprod < 90)

# Gather the data for summary statistics 
df_cells <- data %>%
  dplyr::select(1:4, Lake, Season:Depth) %>%
  rename(Total = Total.cells, HNA = HNA.cells, LNA = LNA.cells) %>%
  gather(key = FCM_type, value = num_cells, Total:LNA)

Summary & Descriptive Statistics

# Number of cells in HNA and LNA across all samples 
    # For a plot of these values, see figure 1 below
df_cells %>%
  group_by(FCM_type) %>%
  summarise(num_samples = n(),
            mean_cells = round(mean(num_cells), digits = 2),
            sd_mean_cells = round(sd(num_cells), digits = 2),
            median_cells = round(median(num_cells), digits = 2)) %>%
  datatable(caption = "Mean and median number of cells per ecosystem", rownames = FALSE)

# Are there more total cells in one lake over the other?
totcells_df <- df_cells %>%
  filter(FCM_type == "Total")
# Compute the analysis of variance
totcells_aov <- aov(num_cells ~ Lake, data = totcells_df)
summary(totcells_aov) # there is a difference, but which lake?

##              Df    Sum Sq   Mean Sq F value Pr(>F)    
## Lake          2 6.995e+14 3.498e+14   83.84 <2e-16 ***
## Residuals   170 7.092e+14 4.172e+12                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# Which samples are significant from each other?
TukeyHSD(totcells_aov) # Michigan is significantly different form Muskegon and Inland

##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = num_cells ~ Lake, data = totcells_df)
## 
## $Lake
##                        diff      lwr       upr     p adj
## Inland-Michigan   4581227.7  3658141 5504314.6 0.0000000
## Muskegon-Michigan 4332278.2  3409191 5255365.0 0.0000000
## Muskegon-Inland   -248949.5 -1116299  618399.9 0.7762399

# Number of cells in HNA and LNA per ecosystem
df_cells %>%
  group_by(Lake, FCM_type) %>%
  summarise(num_samples = n(),
            mean_cells = round(mean(num_cells), digits = 2),
            median_cells = round(median(num_cells), digits = 2)) %>%
  datatable(caption = "Mean and median number of cells per ecosystem", rownames = FALSE)

# Proportion of HNA and LNA across all samples 
df_cells %>%
  dplyr::select(samples, Lake, FCM_type, num_cells) %>%
  spread(FCM_type, num_cells) %>%
  mutate(prop_HNA = HNA/Total * 100,
         prop_LNA = LNA/Total * 100) %>%
  dplyr::select(Lake, prop_HNA, prop_LNA) %>%
  summarize(mean_HNA = round(mean(prop_HNA), digits = 2), 
            sd_HNA = round(sd(prop_HNA), digits = 2),
            mean_LNA = round(mean(prop_LNA), digits = 2),
            sd_LNA = round(sd(prop_LNA), digits = 2))

##   mean_HNA sd_HNA mean_LNA sd_LNA
## 1    30.41   9.06    69.59   9.06

# Proportion of HNA and LNAper ecosystem
prop_stats <- df_cells %>%
  dplyr::select(samples, Lake, FCM_type, num_cells) %>%
  spread(FCM_type, num_cells) %>%
  mutate(prop_HNA = HNA/Total * 100,
         prop_LNA = LNA/Total * 100) %>%
  dplyr::select(Lake, prop_HNA, prop_LNA) %>%
  rename(HNA = prop_HNA, LNA = prop_LNA) %>%
  group_by(Lake) %>%
  summarize(mean_HNA = round(mean(HNA), digits = 2),
            mean_LNA = round(mean(LNA), digits = 2),
            min_HNA = round(min(HNA), digits = 2), 
            max_HNA = round(max(HNA), digits = 2), 
            min_LNA = round(min(LNA), digits = 2), 
            max_LNA = round(max(LNA), digits = 2), 
            num_samples = n())

datatable(prop_stats, caption = "Statistics of the percentage of each flow cytometry group across the systems", rownames = FALSE)

Number of cells per system

# Load in the data from each lake 
musk_all_df <- read.table(file="data/Chloroplasts_removed/ByLake_Filtering/5in10/muskegon/muskegon_sampledata_5in10.tsv", header = TRUE) %>%
  mutate(norep_filter_name = paste(substr(Sample_16S,1,4), substr(Sample_16S,6,9), sep = "")) %>%
  arrange(norep_filter_name)

mich_all_df <- read.table(file="data/Chloroplasts_removed/ByLake_Filtering/5in10/michigan/michigan_sampledata_5in10.tsv", header = TRUE) %>%
  mutate(norep_filter_name = paste(substr(Sample_16S,1,4), substr(Sample_16S,6,9), sep = "")) %>%
  arrange(norep_filter_name)

inla_all_df <- read.table(file="data/Chloroplasts_removed/ByLake_Filtering/5in10/inland/inland_sampledata_5in10.tsv", header = TRUE) %>%
  mutate(norep_filter_name = paste(substr(Sample_16S,1,4), substr(Sample_16S,6,9), sep = "")) %>%
  arrange(norep_filter_name)

stopifnot(colnames(musk_all_df) == colnames(mich_all_df))
stopifnot(colnames(musk_all_df) == colnames(inla_all_df))

lakes <- bind_rows(musk_all_df, mich_all_df, inla_all_df)

ggplot(df_cells, aes(x = FCM_type, y = num_cells, fill = Lake, color = Lake, shape = Lake)) + 
  geom_point(size = 1.5, position = position_jitterdodge(), color = "black") + 
  geom_boxplot(alpha = 0.7, outlier.shape = NA, show.legend = FALSE, color = "black") +
  scale_color_manual(values = lake_colors,  guide = "none") +
  scale_fill_manual(values = lake_colors) +
  scale_shape_manual(values = lake_shapes) +
  labs(y = "Number of Cells (cells/mL)", x = "FCM Type") +
  mytheme + theme(legend.title = element_blank(), legend.position = c(0.01, 0.95)) +
  guides(colour = guide_legend(override.aes = list(size=2.5)),
         shape = guide_legend(override.aes = list(size=2.5)),
         fill = guide_legend(override.aes = list(size=2.5)))

Figure 1

########################  Analysis of HNA/LNA/Total Cells vs Total Productivity
# 1. Run the linear model 
lm_HNA <- lm(tot_bacprod ~ HNA.cells, data = muskegon)
summary(lm_HNA) # Linear model for HNA cells vs bacterial production

## 
## Call:
## lm(formula = tot_bacprod ~ HNA.cells, data = muskegon)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -11.627  -6.351  -1.992   6.089  25.165 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -1.045e+01  7.426e+00  -1.407    0.176    
## HNA.cells    1.992e-05  3.290e-06   6.054 1.01e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.282 on 18 degrees of freedom
## Multiple R-squared:  0.6706, Adjusted R-squared:  0.6523 
## F-statistic: 36.65 on 1 and 18 DF,  p-value: 1.01e-05

## 2. Extract the R2 and p-value from the linear model: 
lm_HNA_stats <- paste("atop(R^2 ==", round(summary(lm_HNA)$adj.r.squared, digits = 2), ",",
                                    "p ==", round(unname(summary(lm_HNA)$coefficients[,4][2]), digits = 5), ")")

# 3. Plot it
HNA_vs_prod <- ggplot(muskegon, aes(x = HNA.cells, y = tot_bacprod)) + 
  geom_errorbarh(aes(xmin = HNA.cells - HNA.sd, xmax = HNA.cells + HNA.sd), color = "grey", alpha = 0.8) + 
  geom_errorbar(aes(ymin = tot_bacprod - SD_tot_bacprod, max = tot_bacprod + SD_tot_bacprod), color = "grey", alpha = 0.8) + 
  geom_point(size = 2, shape = 22, fill = "deepskyblue4") + 
  geom_smooth(method = "lm", color = "deepskyblue4") + 
  labs(y = "Bacterial Production \n (μg C/L/day)", x = "HNA Cell Density \n(cells/mL)") +
  scale_x_continuous(labels = function(x) format(x, scientific = TRUE), 
                     breaks = c(2e+06, 3e+06)) +
  annotate("text", x=1.65e+06, y=60, label=lm_HNA_stats, parse = TRUE, color = "black", size = 3) +
  mytheme

# 1. Run the linear model 
lm_LNA <- lm(tot_bacprod ~ LNA.cells, data = muskegon)
summary(lm_LNA) # Linear model for LNA cells vs bacterial production

## 
## Call:
## lm(formula = tot_bacprod ~ LNA.cells, data = muskegon)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -23.8108 -11.4310   0.8419  10.9937  23.7475 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)  
## (Intercept) 2.089e+01  1.185e+01   1.762    0.095 .
## LNA.cells   2.434e-06  2.331e-06   1.044    0.310  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 15.7 on 18 degrees of freedom
## Multiple R-squared:  0.05709,    Adjusted R-squared:  0.004709 
## F-statistic:  1.09 on 1 and 18 DF,  p-value: 0.3103

## 2. Extract the R2 and p-value from the linear model: 
lm_LNA_stats <- paste("atop(R^2 ==", round(summary(lm_LNA)$adj.r.squared, digits = 3), ",",
                      "p ==", round(unname(summary(lm_LNA)$coefficients[,4][2]), digits = 2), ")")

# 3. Plot it
LNA_vs_prod <- ggplot(muskegon, aes(x = LNA.cells, y = tot_bacprod)) + 
  geom_errorbarh(aes(xmin = LNA.cells - LNA.sd, xmax = LNA.cells + LNA.sd), color = "grey", alpha = 0.8) + 
  geom_errorbar(aes(ymin = tot_bacprod - SD_tot_bacprod, max = tot_bacprod + SD_tot_bacprod), color = "grey", alpha = 0.8) + 
  geom_point(size = 2.5, shape = 22, fill = "darkgoldenrod1") + 
  labs(y = "Bacterial Production \n (μg C/L/day)", x = "LNA Cell Density \n(cells/mL)") +
  #geom_smooth(method = "lm", se = FALSE, linetype = "longdash", color = "darkgoldenrod1") + 
  annotate("text", x=2.75e+06, y=60, label=lm_LNA_stats, parse = TRUE, color = "red", size = 3) +
  mytheme

# 1. Run the linear model 
lm_total <- lm(tot_bacprod ~ Total.cells, data = muskegon)
summary(lm_total) # Linear model for total cells vs bacterial production

## 
## Call:
## lm(formula = tot_bacprod ~ Total.cells, data = muskegon)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -22.413 -11.892   1.763  11.378  20.936 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)  
## (Intercept) 4.879e+00  1.254e+01   0.389    0.702  
## Total.cells 3.962e-06  1.727e-06   2.295    0.034 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 14.22 on 18 degrees of freedom
## Multiple R-squared:  0.2264, Adjusted R-squared:  0.1834 
## F-statistic: 5.267 on 1 and 18 DF,  p-value: 0.03397

## 2. Extract the R2 and p-value from the linear model: 
lm_total_stats <- paste("atop(R^2 ==", round(summary(lm_total)$adj.r.squared, digits = 2), ",",
                      "p ==", round(unname(summary(lm_total)$coefficients[,4][2]), digits = 2), ")")

# 3. Plot it 
Total_vs_prod <- ggplot(muskegon, aes(x = Total.cells, y = tot_bacprod)) + 
  geom_errorbarh(aes(xmin = Total.cells - Total.count.sd, xmax = Total.cells + Total.count.sd), color = "grey", alpha = 0.8) + 
  geom_errorbar(aes(ymin = tot_bacprod - SD_tot_bacprod, max = tot_bacprod + SD_tot_bacprod), color = "grey", alpha = 0.8) + 
  scale_shape_manual(values = lake_shapes) +
  geom_point(size = 2.5, shape = 22, fill = "black") + 
  labs(y = "Bacterial Production \n (μg C/L/day)", x = "Total Cell Density \n(cells/mL)") +
  geom_smooth(method = "lm", color = "black") + 
  #geom_smooth(method = "lm", se = FALSE, linetype = "longdash", color = "red") + 
  annotate("text", x=5.25e+06, y=60, label=lm_total_stats, parse = TRUE, color = "black", size = 3) +
  mytheme

###### ###### ###### ###### ###### ###### ###### ###### ###### ###### 
###### Correlation between HNA and LNA Across the three systems ###### 
## Is there a corrlation between HNA and LNA across ecosystems?
# 1. Run the linear model 
lm_allNA_corr <- lm(LNA.cells ~ HNA.cells, data = lakes)
summary(lm(LNA.cells ~ HNA.cells * Lake, data = lakes))

## 
## Call:
## lm(formula = LNA.cells ~ HNA.cells * Lake, data = lakes)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -3651006  -558525  -113725   511977  3997810 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             3.699e+06  3.590e+05  10.304  < 2e-16 ***
## HNA.cells               3.827e-01  1.704e-01   2.246    0.026 *  
## LakeMichigan           -2.995e+06  4.523e+05  -6.622 4.63e-10 ***
## LakeMuskegon           -2.363e+06  5.411e+05  -4.367 2.21e-05 ***
## HNA.cells:LakeMichigan  5.404e-01  4.257e-01   1.269    0.206    
## HNA.cells:LakeMuskegon  1.027e+00  2.549e-01   4.029 8.50e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1301000 on 167 degrees of freedom
## Multiple R-squared:  0.6116, Adjusted R-squared:    0.6 
## F-statistic:  52.6 on 5 and 167 DF,  p-value: < 2.2e-16

## 2. Extract the R2 and p-value from the linear model: 
lm_allNA_corr_stats <- paste("atop(R^2 ==", round(summary(lm_allNA_corr)$adj.r.squared, digits = 2), ",",
                          "p ==", round(unname(summary(lm_allNA_corr)$coefficients[,4][2]), digits = 24), ")")

# 3. Plot it
p2 <- ggplot(lakes, aes(x = HNA.cells, y = LNA.cells)) + 
  geom_errorbarh(aes(xmin = HNA.cells - HNA.sd, xmax = HNA.cells + HNA.sd), color = "grey", alpha = 0.8) + 
  geom_errorbar(aes(ymin = LNA.cells - LNA.sd, max = LNA.cells + LNA.sd), color = "grey", alpha = 0.8) + 
  geom_point(size = 2.5, alpha = 0.9, aes(fill = Lake, shape = Lake)) + 
  scale_fill_manual(values = lake_colors) +
  scale_shape_manual(values = lake_shapes) +
  geom_smooth(method = "lm", color = "black") + 
  labs(y = "LNA Cell Density (cells/mL)", x = "HNA Cell Density (cells/mL)") +
  scale_x_continuous(labels = function(x) format(x, scientific = TRUE), limits = c(0, 6.1e+06)) +
  scale_y_continuous(labels = function(x) format(x, scientific = TRUE), limits = c(0, 1e+07)) +
  annotate("text", x=5e+06, y=0.8e+06, label=lm_allNA_corr_stats, parse = TRUE, color = "black", size = 3) +
  mytheme + theme(legend.position = "none") #theme(legend.title = element_blank(), legend.position = c(0.01, 0.95))

###### MAKE THE PLOT 
## Extract legends for plotting 
plot_for_legend <- 
  ggplot(df_cells, aes(x = Lake, y = num_cells, fill = FCM_type, color = FCM_type)) + 
  geom_point(size = 1, shape = 22, position = position_jitterdodge()) + 
  scale_fill_manual(values = fcm_colors) + 
  scale_color_manual(values = fcm_colors, guide = "none") + 
  scale_shape_manual(values = 22) +
  scale_y_continuous(labels = function(x) format(x, scientific = TRUE)) +
  labs(y = "Number of Cells \n (cells/mL)", x = "Lake", fill = "FCM") +
  theme(legend.position = "right",
           legend.title = element_text(size = 12, face = "bold"), 
           legend.text = element_text(size = 12),
           legend.key.width=unit(1,"line"), 
           legend.key.height=unit(1,"line")) +
  guides(fill = guide_legend(override.aes = list(size=3.5)))

legend1 <- get_legend(plot_for_legend)

legend2 <- get_legend(p2 + theme(legend.position = "right",
           legend.title = element_text(size = 12, face = "bold"), 
           legend.text = element_text(size = 12),
           legend.key.width=unit(1,"line"), 
           legend.key.height=unit(1,"line")) +
  guides(fill = guide_legend(override.aes = list(size=3.5))))

# place the legends
legend_positions <- plot_grid(legend2, legend1, nrow = 2, ncol = 1)

# Place the legends next to figure 1A
row1 <- plot_grid(NULL, p2, legend_positions, 
                        labels = c("", "A", ""),
                        ncol = 3, nrow = 1,
                        rel_widths = c(0.5, 1, 0.5))

# Add Figures 1B, 1C, and 1D
row2 <- plot_grid(HNA_vs_prod + theme(legend.position = "none"), 
          LNA_vs_prod + theme(axis.title.y = element_blank(), legend.position = "none"), 
          Total_vs_prod + theme(axis.title.y = element_blank(), legend.position = "none"),
          labels = c("B", "C", "D"), 
          ncol = 3, nrow = 1, 
          rel_widths = c(0.95, 0.8, 0.8))

# Final Figure 1
plot_grid(row1, row2,
          nrow = 2, ncol = 1)

Figure 1A: Plotted By Lake System

### MUSKEGON ONLY ANALYSIS
# 1. Run the linear model 
lm_muskNA_corr <- lm(LNA.cells ~ HNA.cells, data = musk_all_df)

## 2. Extract the R2 and p-value from the linear model: 
lm_muskNA_corr_stats <- paste("atop(R^2 ==", round(summary(lm_muskNA_corr)$adj.r.squared, digits = 2), ",",
                             "p ==", round(unname(summary(lm_muskNA_corr)$coefficients[,4][2]), digits = 9), ")")

musk_corr_plot <- ggplot(musk_all_df, aes(x = HNA.cells, y = LNA.cells)) + 
  geom_errorbarh(aes(xmin = HNA.cells - HNA.sd, xmax = HNA.cells + HNA.sd), color = "grey") + 
  geom_errorbar(aes(ymin = LNA.cells - LNA.sd, max = LNA.cells + LNA.sd), color = "grey") + 
  geom_point(size = 3, shape = 22, aes(fill = Lake)) + 
  geom_smooth(method = "lm", color = "black") + 
  ggtitle("Muskegon Lake") + scale_fill_manual(values = lake_colors) +
  scale_x_continuous(labels = function(x) format(x, scientific = TRUE), limits = c(0, 6.1e+06)) +
  scale_y_continuous(labels = function(x) format(x, scientific = TRUE), limits = c(0, 1e+07)) +
  labs(y = "LNA Cell Density (cells/mL)", x = "HNA Cell Density (cells/mL)") +
  annotate("text", x=5e+06, y=0.8e+06, label=lm_muskNA_corr_stats, parse = TRUE, color = "black", size = 3) +
  mytheme

### INLAND ONLY ANALYSIS
# 1. Run the linear model 
lm_inlaNA_corr <- lm(LNA.cells ~ HNA.cells, data = filter(inla_all_df, Sample_16S != "Z14003F"))

## 2. Extract the R2 and p-value from the linear model: 
lm_inlaNA_corr_stats <- paste("atop(R^2 ==", round(summary(lm_inlaNA_corr)$adj.r.squared, digits = 2), ",",
                              "p ==", round(unname(summary(lm_inlaNA_corr)$coefficients[,4][2]), digits = 2), ")")

inla_corr_plot <- ggplot(filter(inla_all_df, Sample_16S != "Z14003F"), aes(x = HNA.cells, y = LNA.cells)) + 
  geom_errorbarh(aes(xmin = HNA.cells - HNA.sd, xmax = HNA.cells + HNA.sd), color = "grey") + 
  geom_errorbar(aes(ymin = LNA.cells - LNA.sd, max = LNA.cells + LNA.sd), color = "grey") + 
  geom_point(size = 3, shape = 22, aes(fill = Lake)) + 
  geom_smooth(method = "lm", color = "black") + 
  scale_x_continuous(labels = function(x) format(x, scientific = TRUE), limits = c(0, 6.1e+06)) +
  scale_y_continuous(labels = function(x) format(x, scientific = TRUE), limits = c(0, 1e+07)) +
  ggtitle("Inland Lakes") + scale_fill_manual(values = lake_colors) +
  labs(y = "LNA Cell Density (cells/mL)", x = "HNA Cell Density (cells/mL)") +
  annotate("text", x=5e+06, y=0.8e+06, label=lm_inlaNA_corr_stats, parse = TRUE, color = "black", size = 3) +
  mytheme

### MICHIGAN ONLY ANALYSIS
# 1. Run the linear model 
lm_michNA_corr <- lm(LNA.cells ~ HNA.cells, data = mich_all_df)

# Without the Lake Michigan outlier!
summary(lm(LNA.cells ~ HNA.cells, data = filter(mich_all_df, Sample_16S != "M15S2F515")))

## 
## Call:
## lm(formula = LNA.cells ~ HNA.cells, data = filter(mich_all_df, 
##     Sample_16S != "M15S2F515"))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -599787 -239620  -95094  241831  987737 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 5.832e+05  9.793e+04   5.956 3.37e-07 ***
## HNA.cells   1.200e+00  1.797e-01   6.678 2.78e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 354600 on 46 degrees of freedom
## Multiple R-squared:  0.4922, Adjusted R-squared:  0.4812 
## F-statistic: 44.59 on 1 and 46 DF,  p-value: 2.778e-08

## 2. Extract the R2 and p-value from the linear model: 
lm_michNA_corr_stats <- paste("atop(R^2 ==", round(summary(lm_michNA_corr)$adj.r.squared, digits = 2), ",",
                              "p ==", round(unname(summary(lm_michNA_corr)$coefficients[,4][2]), digits = 11), ")")

mich_corr_plot <- ggplot(mich_all_df, aes(x = HNA.cells, y = LNA.cells)) + 
  geom_errorbarh(aes(xmin = HNA.cells - HNA.sd, xmax = HNA.cells + HNA.sd), color = "grey") + 
  geom_errorbar(aes(ymin = LNA.cells - LNA.sd, max = LNA.cells + LNA.sd), color = "grey") + 
  geom_point(size = 3, shape = 22, aes(fill = Lake)) + 
  geom_smooth(method = "lm", color = "black") + 
  scale_x_continuous(labels = function(x) format(x, scientific = TRUE), limits = c(0, 6.1e+06)) +
  scale_y_continuous(labels = function(x) format(x, scientific = TRUE), limits = c(0, 1e+07)) +
  ggtitle("Lake Michigan") + scale_fill_manual(values = lake_colors) +
  labs(y = "LNA Cell Density (cells/mL)", x = "HNA Cell Density (cells/mL)") +
  annotate("text", x=5e+06, y=0.8e+06, label=lm_michNA_corr_stats, parse = TRUE, color = "black", size = 3) +
  mytheme

plot_grid(mich_corr_plot, inla_corr_plot, musk_corr_plot, 
          labels = c("A", "B", "C"), nrow = 1, ncol = 3)

# Which sample is the outlier? And how does it compare from the other top 3 samples?
mich_all_df %>% arrange(-Total.cells) %>% head(n=3)

##          samples Total.cells HNA.cells LNA.cells Total.count.sd   HNA.sd   LNA.sd Platform     Lake        Sample_16S Season Month Year Fraction Site   Depth Total_Sequences norep_filter_name
## 1 M15S2-515-DMSO     6426699 3182154.2   3244545       80936.74 42003.28 58941.66   Accuri Michigan         M15S2F515 Spring   May 2015     Free MM15 Surface           14860          M15SF515
## 2 MM15-D-D_April     3019947 1329590.7   1690357       35081.79 25818.56 25525.35   Accuri Michigan Sp13.BD.MM15.DD.1 Spring April 2013     Free MM15    Deep           35392          Sp13BD.M
## 3  MM15-S-N_July     2963173  632722.9   2330450       37604.70 16918.47 27240.06   Accuri Michigan Su13.BD.MM15.SN.1 Summer  July 2013     Free MM15 Surface           35065          Su13BD.M

Figure 1B & 1D: Fraction HNA vs Productivity

## Plot the fraction of HNA
fmusk <- muskegon %>% 
  mutate(fHNA = HNA.cells/Total.cells,
         fLNA = LNA.cells/Total.cells)

lm_fHNA <- lm(tot_bacprod ~ fHNA, data = fmusk)

## 2. Extract the R2 and p-value from the linear model: 
lm_fHNA_stats <- paste("atop(R^2 ==", round(summary(lm_fHNA)$adj.r.squared, digits = 2), ",",
                      "p ==", round(unname(summary(lm_fHNA)$coefficients[,4][2]), digits = 3), ")")


fHNA_vs_prod <- ggplot(fmusk, aes(x = fHNA, y = tot_bacprod)) + 
  geom_errorbar(aes(ymin = tot_bacprod - SD_tot_bacprod, max = tot_bacprod + SD_tot_bacprod), color = "grey") + 
  geom_point(size = 3, aes(shape = Lake), fill = "deepskyblue4") + 
  geom_smooth(method = "lm", linetype = "longdash", color = "deepskyblue4") + 
  scale_x_continuous(limits = c(0.15, 0.9), breaks = seq(0.2, 0.9, by = 0.2)) + 
  ylab("Bacterial Production") + xlab("Fraction HNA") +
  scale_shape_manual(values = lake_shapes) + 
  annotate("text", x= 0.22, y=60, label=lm_fHNA_stats, parse = TRUE, color = "black", size = 3) +
  mytheme + theme(legend.position = "none")


### LNA Fraction
## Plot the fraction of HNA
lm_fLNA <- lm(tot_bacprod ~ fLNA, data = fmusk)

## 2. Extract the R2 and p-value from the linear model: 
lm_fLNA_stats <- paste("atop(R^2 ==", round(summary(lm_fLNA)$adj.r.squared, digits = 2), ",",
                      "p ==", round(unname(summary(lm_fLNA)$coefficients[,4][2]), digits = 3), ")")


fLNA_vs_prod <- ggplot(fmusk, aes(x = fLNA, y = tot_bacprod)) + 
  geom_errorbar(aes(ymin = tot_bacprod - SD_tot_bacprod, max = tot_bacprod + SD_tot_bacprod), color = "grey") + 
  geom_point(size = 3, aes(shape = Lake), fill = "darkgoldenrod1") + 
  geom_smooth(method = "lm", color = "darkgoldenrod1", fill = "darkgoldenrod1",  linetype = "longdash") + 
  scale_x_continuous(limits = c(0.15, 0.9), breaks = seq(0.2, 0.9, by = 0.2)) + 
  ylab("Bacterial Production") + xlab("Fraction LNA") +
  scale_shape_manual(values = lake_shapes) + 
  annotate("text", x= 0.22, y=60, label=lm_fLNA_stats, parse = TRUE, color = "black", size = 3) +
  mytheme + theme(legend.position = "none")

plot_grid(HNA_vs_prod + theme(legend.position = "none"), 
          fHNA_vs_prod,
          LNA_vs_prod, fLNA_vs_prod,
          labels = c("A", "B", "C", "D"), ncol = 2, nrow = 2,
          align = "h")

Figure 4: Heatmap

Load RL Score data

# Read in Data
dfHNA_musk_scores <- read.csv("FS_final/Muskegon_fs_scores_HNA_5seq10.csv") %>%
  dplyr::filter(RL.score > 0.09) %>%
  mutate(Lake = "Muskegon")%>%
  rename(OTU = X, RL.score.HNA = RL.score) %>%
  dplyr::select(OTU, RL.score.HNA)

dfLNA_musk_scores <- read.csv("FS_final/Muskegon_fs_scores_LNA_5seq10.csv") %>%
  dplyr::filter(RL.score > 0.09) %>%
  mutate(Lake = "Muskegon") %>%
  rename(OTU = X, RL.score.LNA = RL.score) %>%
  dplyr::select(OTU, RL.score.LNA)

dfHNA_inland_scores <- read.csv("FS_final/Inland_fs_scores_HNA_5seq10.csv") %>%
  dplyr::filter(RL.score > 0.13) %>%
  mutate(Lake = "Inland")%>%
  rename(OTU = X, RL.score.HNA = RL.score) %>%
  dplyr::select(OTU, RL.score.HNA)

dfLNA_inland_scores <- read.csv("FS_final/Inland_fs_scores_LNA_5seq10.csv") %>%
  dplyr::filter(RL.score > 0.108) %>%
  mutate(Lake = "Inland") %>%
  rename(OTU = X, RL.score.LNA = RL.score) %>%
  dplyr::select(OTU, RL.score.LNA)

dfHNA_michigan_scores <- read.csv("FS_final/Michigan_fs_scores_HNA_5seq10.csv") %>%
  dplyr::filter(RL.score > 0.248) %>%
  mutate(Lake = "Michigan")%>%
  rename(OTU = X, RL.score.HNA = RL.score) %>%
  dplyr::select(OTU, RL.score.HNA)

dfLNA_michigan_scores <- read.csv("FS_final/Michigan_fs_scores_LNA_5seq10.csv") %>%
  dplyr::filter(RL.score > 0.286) %>%
  mutate(Lake = "Michigan") %>%
  rename(OTU = X, RL.score.LNA = RL.score) %>%
  dplyr::select(OTU, RL.score.LNA)

# Combine HNA and LNA datasets together
muskegon_df_scores <- full_join(dfHNA_musk_scores, dfLNA_musk_scores, by = "OTU") %>%
  rename(Muskegon_HNA = RL.score.HNA, Muskegon_LNA = RL.score.LNA) # Rename the column to have the lake and FCM
inland_df_scores <- full_join(dfHNA_inland_scores, dfLNA_inland_scores, by = "OTU") %>%
  rename(Inland_HNA = RL.score.HNA, Inland_LNA = RL.score.LNA) # Rename the column to have the lake and FCM
michigan_df_scores <- full_join(dfHNA_michigan_scores, dfLNA_michigan_scores, by = "OTU") %>%
  rename(Michigan_HNA = RL.score.HNA, Michigan_LNA = RL.score.LNA) # Rename the column to have the lake and FCM

# Combine into a full dataset
df_RLscores <- 
  full_join(muskegon_df_scores, inland_df_scores, by = "OTU") %>%
  full_join(michigan_df_scores, by = "OTU") 
  
matrix_RLscores <- df_RLscores %>%
  tibble::column_to_rownames(var = "OTU") %>%
  as.matrix()

# Replace the NA values with 0 
matrix_RLscores[is.na(matrix_RLscores)] <- 0

Plot figure 4: RL Scores

breakers <- seq(min(matrix_RLscores, na.rm = T), max(matrix_RLscores, na.rm = T), length.out = 21)
my_palette <- colorRampPalette(c("white", "red")) (n=20)

# Set the colors of the different columns going in
colz <- c("#FF933F", "#FF933F", "#EC4863", "#EC4863", "#5C2849", "#5C2849")

heatmap.2(matrix_RLscores, 
          distfun = function(x) dist(x, method = "euclidean"),
          hclustfun = function(x) hclust(x,method = "complete"),
          col = my_palette, breaks = breakers, trace = "none",
          key = TRUE, symkey = FALSE, density.info = "none", srtCol = 30,
          margins=c(5.5,7), cexRow = 1.25,cexCol = 1.25,
          key.xlab = "RL Score",ColSideColors = colz,
          lhei = c(1,9))

Top 10 from Fig 4 Scores

# Extract the top 10 OTUs 
mich_top10_OTUs <- michigan_df_scores %>%
  top_n(., 10, Michigan_HNA) %>%
  dplyr::select(OTU)

inland_top10_HNA_OTUs <- inland_df_scores %>%
  dplyr::select(OTU, Inland_HNA) %>%
  top_n(., 10, Inland_HNA) %>%
  dplyr::select(OTU)

inland_top10_LNA_OTUs <- inland_df_scores %>%
  dplyr::select(OTU, Inland_LNA) %>%
  top_n(., 10, Inland_LNA) %>%
  dplyr::select(OTU) 
  
musk_top10_HNA_OTUs <- muskegon_df_scores %>%
  dplyr::select(OTU, Muskegon_HNA) %>%
  top_n(., 10, Muskegon_HNA) %>%
  dplyr::select(OTU)

musk_top10_LNA_OTUs <- muskegon_df_scores %>%
  dplyr::select(OTU, Muskegon_LNA) %>%
  top_n(., 10, Muskegon_LNA) %>%
  dplyr::select(OTU)

# The above code pulls out the OTUs and we'll match this with the data from the full dataset
df_OTUs <- bind_rows(mich_top10_OTUs, inland_top10_HNA_OTUs, inland_top10_LNA_OTUs, musk_top10_HNA_OTUs, musk_top10_LNA_OTUs) %>%
  unique()

# Do a filtering join 
df_top10 <- df_OTUs %>%
  left_join(df_RLscores, by = "OTU")

matrix_top10 <- df_top10 %>%
  # Remove the zeros in the OTU names 
  mutate(OTU = str_replace(OTU, "00", "")) %>%
  tibble::column_to_rownames(var = "OTU") %>%
  as.matrix()

# The heatmap won't work with NAs 
matrix_top10[is.na(matrix_top10)] <- 0

#png("Analysis_Figures/clustering_top10.png", units = "in", width = 6, height = 8, res = 300)

heatmap.2(matrix_top10, 
        distfun = function(x) dist(x, method = "euclidean"),
        hclustfun = function(x) hclust(x,method = "complete"),
        col = my_palette, breaks = breakers, trace = "none",
        key = TRUE, symkey = FALSE, density.info = "none", srtCol = 30,
        margins=c(5.5,7), cexRow = 1.25,cexCol = 1.25,
        key.xlab = "RL Score",ColSideColors = colz,
        lhei = c(1.5,9), key.title=NA)

#dev.off()

# WITHOUT the clustering!
heatmap.2(matrix_top10,
        dendrogram = "none", Rowv = FALSE, Colv = FALSE,
        col = my_palette, breaks = breakers, trace = "none",
        key = TRUE, symkey = FALSE, density.info = "none", srtCol = 30,
        margins=c(5.5,7), cexRow = 1.25,cexCol = 1.25,
        key.xlab = "RL Score",ColSideColors = colz,
        lhei = c(1,9), #lmat = rbind(1,2),
        key.title=NA)

## Error in plot.new(): figure margins too large

# Only cluster the columns
heatmap.2(matrix_top10, 
        distfun = function(x) dist(x, method = "euclidean"),
        hclustfun = function(x) hclust(x,method = "complete"),
        # Do not cluster based on rows
        dendrogram = "column",Rowv = FALSE, 
        col = my_palette, breaks = breakers, trace = "none",
        key = TRUE, symkey = FALSE, density.info = "none", srtCol = 30,
        margins=c(5.5,7), cexRow = 1.25,cexCol = 1.25,
        key.xlab = "RL Score",ColSideColors = colz,
        lhei = c(1.5,9), key.title=NA)

Figure 5: Phylogenetic Tree

#### PHYLOGENETIC ANALYSIS
load("data/phyloseq.RData")
physeq.otu

## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 13370 taxa and 859 samples ]
## tax_table()   Taxonomy Table:    [ 13370 taxa by 7 taxonomic ranks ]

# With all of the OTUs that pass the RL score threshold
otu_scores_df <- matrix_RLscores %>%
  as.data.frame() %>%
  tibble::rownames_to_column("OTU")

# Subset the vector with names of the OTUs
vector_of_otus <- as.vector(otu_scores_df$OTU)

# Write to a file
#write(vector_of_otus, file = "data/fasttree/Figure5/OTUnames_RLscores_258.txt",
#      ncolumns = 1, append = FALSE, sep = "\n")

physeq <- physeq.otu %>%
  subset_taxa(., taxa_names(physeq.otu) %in% vector_of_otus)  

# Which OTU is missing?
setdiff(sort(vector_of_otus), sort(taxa_names(physeq)))

## character(0)

setdiff(sort(taxa_names(physeq)), sort(vector_of_otus))

## character(0)

######################################### FASTTREE
fast_tree <- read.tree(file="data/fasttree/Figure5/newick_tree_OTUs258.tre")

## Error in file(file, "r"): cannot open the connection

fast_tree_tip_order <- data.frame(fast_tree$tip.label) %>%
  rename(OTU = fast_tree.tip.label)

## Error in data.frame(fast_tree$tip.label): object 'fast_tree' not found

# Make sure the OTUs match 
stopifnot(sort(vector_of_otus)==sort(as.character(fast_tree_tip_order$OTU)))

## Error in sort(vector_of_otus) == sort(as.character(fast_tree_tip_order$OTU)): object 'fast_tree_tip_order' not found

fasttree_physeq <- merge_phyloseq(physeq, phy_tree(fast_tree))

## Error in phy_tree(fast_tree): object 'fast_tree' not found

# Fix the taxonomy names 
colnames(tax_table(fasttree_physeq)) <- c("Kingdom","Phylum","Class","Order","Family","Genus","Species")

## Error in colnames(tax_table(fasttree_physeq)) <- c("Kingdom", "Phylum", : object 'fasttree_physeq' not found

###################################################################### ADD THE PROTEOBACTERIA TO THE PHYLA
phy <- data.frame(tax_table(fasttree_physeq))

## Error in tax_table(fasttree_physeq): object 'fasttree_physeq' not found

Phylum <- as.character(phy$Phylum)

## Error in eval(expr, envir, enclos): object 'phy' not found

Class <- as.character(phy$Class)

## Error in eval(expr, envir, enclos): object 'phy' not found

for  (i in 1:length(Phylum)){ 
  if (Phylum[i] == "Proteobacteria"){
    Phylum[i] <- Class[i]  } }

## Error in eval(expr, envir, enclos): object 'Phylum' not found

phy$Phylum <- Phylum # Add the new phylum level data back to phy

## Error in eval(expr, envir, enclos): object 'Phylum' not found

t <- tax_table(as.matrix(phy))

## Error in as.matrix(phy): object 'phy' not found

tax_table(fasttree_physeq) <- t

## Error in tax_table(fasttree_physeq) <- t: object 'fasttree_physeq' not found

fasttree_tax <- data.frame(tax_table(fasttree_physeq)) %>%
  tibble::rownames_to_column(var = "OTU")

## Error in tax_table(fasttree_physeq): object 'fasttree_physeq' not found

## Let's root the tree
is.rooted(fast_tree)

## Error in is.rooted(fast_tree): object 'fast_tree' not found

rooted_tree <- root(fast_tree, outgroup = "Otu001533", resolve.root = TRUE)

## Error in root(fast_tree, outgroup = "Otu001533", resolve.root = TRUE): object 'fast_tree' not found

is.rooted(rooted_tree) # Sanity Check

## Error in is.rooted(rooted_tree): object 'rooted_tree' not found

# Need to make a manual file based off of FIgure 4 - it starts with OTU names
# write.csv(df_RLscores, file = "data/fasttree/Figure5/OTUs258_MANUAL.csv", row.names = FALSE)
# Next I manually filled in the fcm_type and lake columns :/ 

phyfcm_fasttree_df <- read.csv("data/fasttree/Figure5/OTUs258_MANUAL.csv") %>%
  left_join(., fasttree_tax, by = "OTU") %>%
  tibble::column_to_rownames("OTU") %>%
  dplyr::select(Phylum, FCM)

## Error in file(file, "rt"): cannot open the connection

rooted_tree_plot <- 
  ggplot(rooted_tree, aes(x, y)) + geom_tree() + theme_tree() +
  geom_tiplab(size=3, align=TRUE, linesize=.5) #+

## Error in ggplot(rooted_tree, aes(x, y)): object 'rooted_tree' not found

  #geom_nodelab(vjust=-.5, size=3) 

gheatmap(rooted_tree_plot, phyfcm_fasttree_df, offset = 0.1, width=0.3, font.size=3, colnames_angle=0, hjust=0.5) +
  scale_fill_manual(values = phylum_colors, 
                    breaks = c("Acidobacteria","Actinobacteria", "Alphaproteobacteria", "Bacteria_unclassified", "Bacteroidetes", "Betaproteobacteria", 
                               "Chlorobi", "Chloroflexi","Cyanobacteria","Deltaproteobacteria", "Epsilonproteobacteria", "Firmicutes", 
                               "Gammaproteobacteria","Gemmatimonadetes","Omnitrophica","Parcubacteria","Planctomycetes",
                               "Proteobacteria_unclassified", "TA18","unknown_unclassified", "Verrucomicrobia", 
                               # FCM Groups
                               "Both", "HNA", "LNA")) +
  guides(fill=guide_legend(ncol=1))

## Error in gheatmap(rooted_tree_plot, phyfcm_fasttree_df, offset = 0.1, : could not find function "gheatmap"

Prepare files for iTOL - Figure 5

OTU258_and_phylum <- read.csv("data/fasttree/Figure5/OTUs258_MANUAL.csv") %>%
  left_join(., fasttree_tax, by = "OTU") %>%
  dplyr::select(OTU, Phylum) 

OTU258_and_FCM <- read.csv("data/fasttree/Figure5/OTUs258_MANUAL.csv") %>%
  left_join(., fasttree_tax, by = "OTU") %>%
  dplyr::select(OTU, FCM) %>%
  rename(Phylum = FCM) # To extract from phylum_colors

itol_colors <- 
  phylum_colors %>%
  as.list() %>%
  as.matrix() %>%
  as.data.frame() %>%
  tibble::rownames_to_column("Phylum") %>%
  rename(color_name = V1) %>%
  mutate(hex = col2hex(color_name)) %>%
  dplyr::select(hex, Phylum)

itol_phylum_df <- left_join(OTU258_and_phylum, itol_colors, by = "Phylum") %>%
  mutate(itol_label = "range") %>%
  dplyr::select(OTU, itol_label, hex, Phylum)

#write.csv(itol_phylum_df, file = "data/fasttree/Figure5/itol_df.csv", row.names = FALSE)

#itol_phylum_df %>%
#  dplyr::select(OTU, hex, Phylum) %>%
#  write.csv(file = "data/fasttree/Figure5/itol_PHYLUM_df.csv", row.names = FALSE)

itol_FCM_df <- left_join(OTU258_and_FCM, itol_colors, by = "Phylum") %>%
  rename(FCM = Phylum) %>% 
  dplyr::select(OTU, hex, FCM)
#write.csv(itol_FCM_df, file = "data/fasttree/Figure5/itol_FCM_df.csv", row.names = FALSE)


# FOR RL SCORE in iTOL
# Write a file for HNA
#read.csv("data/fasttree/Figure5/OTUs258_MANUAL.csv") %>%
#  dplyr::select(OTU, Muskegon_HNA, Inland_HNA, Michigan_HNA) %>%
#  write.csv(file = "data/fasttree/Figure5/HNA_RLscores.csv", row.names = FALSE)
# Write a file for LNA
#read.csv("data/fasttree/Figure5/OTUs258_MANUAL.csv") %>%
#  dplyr::select(OTU, Muskegon_LNA, Inland_LNA, Michigan_LNA) %>%
#  write.csv(file = "data/fasttree/Figure5/LNA_RLscores.csv", row.names = FALSE)

read.csv("data/fasttree/Figure5/LNA_RLscores.csv") %>%
  dplyr::select(OTU, RL_score) %>%
  rename(LNA_score = RL_score) %>%
#  write.csv(file = "data/fasttree/Figure5/iTOL_LNA_RLscores.csv", row.names = FALSE)

#read.csv("data/fasttree/Figure5/HNA_RLscores.csv") %>%
#  dplyr::select(OTU, RL_score) %>%
#  rename(HNA_score = RL_score) %>%  
#  write.csv(file = "data/fasttree/Figure5/iTOL_HNA_RLscores.csv", row.names = FALSE)

LNA_lake <- read.csv("data/fasttree/Figure5/LNA_RLscores.csv") %>%
  dplyr::select(OTU, lake) %>%
  mutate(lake = as.character(lake)) %>%
  rename(LNA_Lake = lake)
  
HNA_lake <- read.csv("data/fasttree/Figure5/HNA_RLscores.csv") %>%
  dplyr::select(OTU, lake) %>%
  mutate(lake = as.character(lake)) %>%
  rename(HNA_Lake = lake)

OTU_preferred_lake <- HNA_lake %>%
  left_join(LNA_lake, by = "OTU") %>%
  mutate(lake = ifelse(is.na(LNA_Lake), HNA_Lake,
                       ifelse(is.na(HNA_Lake), LNA_Lake,
                              ifelse(HNA_Lake == LNA_Lake, HNA_Lake,
                                     "Mismatch")))) %>%
  dplyr::select(OTU, lake)

itol_lake_colors <- c(
  "Muskegon" = "#FF933F",  
  "Michigan" =  "#EC4863", 
  "Inland" =  "#5C2849",
  "All" = "#8b6914",
  "Muskegon-Inland" = "#FFD3B5",
  "Mismatch" = "#CD0000")  

lake_col_df <- itol_lake_colors %>%
  as.list() %>%
  as.matrix() %>%
  as.data.frame() %>%
  tibble::rownames_to_column("lake") %>%
  rename(color_name = V1) %>%
  mutate(color_name = as.character(color_name))

#OTU_preferred_lake %>%
#  left_join(lake_col_df, by = "lake") %>%
#  dplyr::select(OTU, color_name, lake) %>%
#  write.csv(file = "data/fasttree/Figure5/iTOL_Lake.csv", row.names = FALSE)

How many OTUs per system?

# Number of OTUs per system and FCM group
num_OTUs <- c(nrow(dfHNA_musk_scores), nrow(dfLNA_musk_scores), nrow(dfHNA_inland_scores), nrow(dfLNA_inland_scores), 
              nrow(dfHNA_michigan_scores), nrow(dfLNA_michigan_scores))
lake_names <- as.vector(c("Muskegon", "Muskegon", "Inland", "Inland", "Michigan", "Michigan"))
fcm_names <- as.vector(c("HNA", "LNA", "HNA", "LNA", "HNA", "LNA"))

summary_OTU_df <- as.data.frame(num_OTUs) %>%
  mutate(lake = c("Muskegon", "Muskegon", "Inland", "Inland", "Michigan", "Michigan"),
         fcm = c("HNA", "LNA", "HNA", "LNA", "HNA", "LNA"))
summary_OTU_df

##   num_OTUs     lake fcm
## 1       99 Muskegon HNA
## 2       98 Muskegon LNA
## 3       55   Inland HNA
## 4       79   Inland LNA
## 5       12 Michigan HNA
## 6        9 Michigan LNA

ggplot(summary_OTU_df, aes(x = fcm, y = num_OTUs, fill = fcm)) + 
  geom_bar(stat = "identity", color = "black") +
  scale_fill_manual(values = fcm_colors) + 
  scale_y_continuous(expand = c(0,0), limits = c(0, 125)) +
  labs(x = "Flow Cytometry Group", y = "Number of Selected OTUs") + 
  facet_grid(.~lake) + mytheme +
  theme(legend.title = element_blank(), 
        legend.key.width = unit(0.5, "cm"),
        legend.key.height = unit(0.5, "cm"))

lake_OTU_nums <- summary_OTU_df %>%
  group_by(lake) %>%
  summarise(sum(num_OTUs))
lake_OTU_nums

## # A tibble: 3 x 2
##   lake     `sum(num_OTUs)`
##   <chr>              <int>
## 1 Inland               134
## 2 Michigan              21
## 3 Muskegon             197

ggplot(lake_OTU_nums, aes(x = lake, y = `sum(num_OTUs)`, fill = lake)) + 
  geom_bar(stat = "identity", color = "black") + 
  labs(x = "Lake", y = "Total Number of Selected OTUs") + 
  scale_fill_manual(values = lake_colors) + 
  scale_y_continuous(expand = c(0,0), limits = c(0, 200)) +
  mytheme +
  theme(legend.title = element_blank(), 
        legend.key.width = unit(0.5, "cm"),
        legend.key.height = unit(0.5, "cm"))

Number of OTUs per phylum

# How many selected OTUs in total?
nrow(phyfcm_fasttree_df)

## Error in nrow(phyfcm_fasttree_df): object 'phyfcm_fasttree_df' not found

# How many selected OTUs are in each Phylum? 
phyfcm_fasttree_df %>%
  mutate(OTU = row.names(.)) %>%
  count(Phylum) %>%
  datatable()

## Error in eval(lhs, parent, parent): object 'phyfcm_fasttree_df' not found

# How many selected OTUs within each phylum are HNA, LNA, or Both? 
phyfcm_fasttree_df %>%
  mutate(OTU = row.names(.)) %>%
  group_by(Phylum) %>%
  count(FCM) %>%
  datatable()

## Error in eval(lhs, parent, parent): object 'phyfcm_fasttree_df' not found

Specific Bacterial Phyla

# Bacteroidetes 
bacteroidetes <- phyfcm_fasttree_df %>%
  mutate(OTU = row.names(.)) %>%
  dplyr::filter(Phylum == "Bacteroidetes") %>%
  count(FCM) %>% 
  mutate(percent = n/sum(n)); bacteroidetes

## Error in eval(lhs, parent, parent): object 'phyfcm_fasttree_df' not found

## Error in eval(expr, envir, enclos): object 'bacteroidetes' not found

# Betaproteobacteria 
betaproteobacteria <- phyfcm_fasttree_df %>%
  mutate(OTU = row.names(.)) %>%
  dplyr::filter(Phylum == "Betaproteobacteria") %>%
  count(FCM) %>% 
  mutate(percent = n/sum(n)); betaproteobacteria

## Error in eval(lhs, parent, parent): object 'phyfcm_fasttree_df' not found

## Error in eval(expr, envir, enclos): object 'betaproteobacteria' not found

# Alphaproteobacteria 
alphaproteobacteria <- phyfcm_fasttree_df %>%
  mutate(OTU = row.names(.)) %>%
  dplyr::filter(Phylum == "Alphaproteobacteria") %>%
  count(FCM) %>% 
  mutate(percent = n/sum(n)); alphaproteobacteria

## Error in eval(lhs, parent, parent): object 'phyfcm_fasttree_df' not found

## Error in eval(expr, envir, enclos): object 'alphaproteobacteria' not found

# Verrucomicrobia 
verrucomicrobia <- phyfcm_fasttree_df %>%
  mutate(OTU = row.names(.)) %>%
  dplyr::filter(Phylum == "Verrucomicrobia") %>%
  count(FCM) %>%
  mutate(percent = n/sum(n)); verrucomicrobia

## Error in eval(lhs, parent, parent): object 'phyfcm_fasttree_df' not found

## Error in eval(expr, envir, enclos): object 'verrucomicrobia' not found

# How many of the total OTUs were Bacteroidetes
top4_phyla <- sum(bacteroidetes$n)+sum(betaproteobacteria$n)+sum(alphaproteobacteria$n)+sum(verrucomicrobia$n)

## Error in eval(expr, envir, enclos): object 'bacteroidetes' not found

sum(top4_phyla)/nrow(phyfcm_fasttree_df) # % of total OTUs selected

## Error in eval(expr, envir, enclos): object 'top4_phyla' not found

# Percent of total selected OTUs by each phylum
sum(bacteroidetes$n)/nrow(phyfcm_fasttree_df) # % of Bacteroidetes

## Error in eval(expr, envir, enclos): object 'bacteroidetes' not found

sum(betaproteobacteria$n)/nrow(phyfcm_fasttree_df) # % of Betaproteobacteria

## Error in eval(expr, envir, enclos): object 'betaproteobacteria' not found

sum(alphaproteobacteria$n)/nrow(phyfcm_fasttree_df) # % of Alphaproteobacteria

## Error in eval(expr, envir, enclos): object 'alphaproteobacteria' not found

sum(verrucomicrobia$n)/nrow(phyfcm_fasttree_df) # % of Verrucomicrobia

## Error in eval(expr, envir, enclos): object 'verrucomicrobia' not found

Phylogenetic Signal

Prepare data

# Load in the scores 
LNA_scores_df <- read.csv("data/fasttree/Figure5/LNA_RLscores.csv") %>%
  dplyr::select(OTU, RL_score) %>%
  rename(LNA_score = RL_score)

## Error in file(file, "rt"): cannot open the connection

HNA_scores_df <- read.csv("data/fasttree/Figure5/HNA_RLscores.csv") %>%
  dplyr::select(OTU, RL_score) %>%
  rename(HNA_score = RL_score)

## Error in file(file, "rt"): cannot open the connection

stopifnot(dim(LNA_scores_df) == dim(HNA_scores_df))

## Error in dim(LNA_scores_df) == dim(HNA_scores_df): object 'LNA_scores_df' not found

# Put them into one data frame
summarized_RL_scores <- HNA_scores_df %>%
  left_join(LNA_scores_df, by = "OTU")

## Error in eval(lhs, parent, parent): object 'HNA_scores_df' not found

# For simpliclity, make sure the vectors of OTUs match the OTU order in the phylogeny
summarized_RL_scores <- summarized_RL_scores[match(rooted_tree$tip.label, summarized_RL_scores$OTU),]

## Error in eval(expr, envir, enclos): object 'summarized_RL_scores' not found

stopifnot(rooted_tree$tip.label == summarized_RL_scores$OTU)

## Error in rooted_tree$tip.label == summarized_RL_scores$OTU: object 'rooted_tree' not found

# Load FCM data that is categorical
FCM_categorical_df <- read.csv("data/fasttree/Figure5/OTUs258_MANUAL.csv") %>%
   dplyr::select(OTU, FCM)

## Error in file(file, "rt"): cannot open the connection

FCM_categorical_df <- FCM_categorical_df[match(rooted_tree$tip.label, FCM_categorical_df$OTU),]

## Error in eval(expr, envir, enclos): object 'FCM_categorical_df' not found

stopifnot(rooted_tree$tip.label == FCM_categorical_df$OTU)

## Error in rooted_tree$tip.label == FCM_categorical_df$OTU: object 'rooted_tree' not found

####### TREEs for only HNA and LNA
# HNA: Subset all of the HNA OTUs
HNA_df_phydist <- summarized_RL_scores %>%
  select(OTU, HNA_score) %>%
  na.omit()

## Error in eval(lhs, parent, parent): object 'summarized_RL_scores' not found

HNAscores <- HNA_df_phydist$HNA_score

## Error in eval(expr, envir, enclos): object 'HNA_df_phydist' not found

names(HNAscores) <- HNA_df_phydist$OTU

## Error in eval(expr, envir, enclos): object 'HNA_df_phydist' not found

# Subset the larger tree to only have the HNA OTUs
HNA_rooted_tree <- ape::drop.tip(phy = rooted_tree, tip = rooted_tree$tip.label[-match(HNA_df_phydist$OTU, rooted_tree$tip.label)])

## Error in ape::drop.tip(phy = rooted_tree, tip = rooted_tree$tip.label[-match(HNA_df_phydist$OTU, : object 'rooted_tree' not found

# Make sure OTUs are in the same order for the test 
HNA_df_phydist <- HNA_df_phydist[match(HNA_rooted_tree$tip.label, HNA_df_phydist$OTU),]

## Error in eval(expr, envir, enclos): object 'HNA_df_phydist' not found

stopifnot(HNA_df_phydist$OTU == HNA_rooted_tree$tip.label)

## Error in HNA_df_phydist$OTU == HNA_rooted_tree$tip.label: object 'HNA_df_phydist' not found

stopifnot(names(HNAscores) == HNA_rooted_tree$tip.label)

## Error in names(HNAscores) == HNA_rooted_tree$tip.label: object 'HNAscores' not found

# LNA: Subset all of the HNA OTUs
LNA_df_phydist <- summarized_RL_scores %>%
  select(OTU, LNA_score) %>%
  na.omit()

## Error in eval(lhs, parent, parent): object 'summarized_RL_scores' not found

LNAscores <- LNA_df_phydist$LNA_score

## Error in eval(expr, envir, enclos): object 'LNA_df_phydist' not found

names(LNAscores) <- LNA_df_phydist$OTU

## Error in eval(expr, envir, enclos): object 'LNA_df_phydist' not found

# Subset the larger tree to only have the LNA OTUs
LNA_rooted_tree <- ape::drop.tip(phy = rooted_tree, tip = rooted_tree$tip.label[-match(LNA_df_phydist$OTU, rooted_tree$tip.label)])

## Error in ape::drop.tip(phy = rooted_tree, tip = rooted_tree$tip.label[-match(LNA_df_phydist$OTU, : object 'rooted_tree' not found

# Make sure OTUs are in the same order for the test 
LNA_df_phydist <- LNA_df_phydist[match(LNA_rooted_tree$tip.label, LNA_df_phydist$OTU),]

## Error in eval(expr, envir, enclos): object 'LNA_df_phydist' not found

stopifnot(LNA_df_phydist$OTU == LNA_rooted_tree$tip.label)

## Error in LNA_df_phydist$OTU == LNA_rooted_tree$tip.label: object 'LNA_df_phydist' not found

stopifnot(names(LNAscores) == LNA_rooted_tree$tip.label)

## Error in names(LNAscores) == LNA_rooted_tree$tip.label: object 'LNAscores' not found

Pagel’s Lambda

Discrete Character Evolution

# Discrete models: From Samantha Price & Roi Holzman: http://www.eve.ucdavis.edu/~wainwrightlab/Roi/Site/Teaching_files/Intro2Phylo_S4.R
discrete_FCM <- FCM_categorical_df$FCM

## Error in eval(expr, envir, enclos): object 'FCM_categorical_df' not found

names(discrete_FCM) <- FCM_categorical_df$OTU

## Error in eval(expr, envir, enclos): object 'FCM_categorical_df' not found

# Discrete Pagel's Lambda Model
discrete_lambda_fit <- fitDiscrete(rooted_tree, discrete_FCM, treeTransform = "lambda")

## Error in treedata(phy, dat): object 'discrete_FCM' not found

# To test for significant phylogenetic signal, compare the negative log likelihood when there is no signal with rescale
lambda0_tree <- rescale(rooted_tree, "lambda", 0) # A big polytomy tree

## Error in rescale(rooted_tree, "lambda", 0): object 'rooted_tree' not found

lambda0_fit <- fitDiscrete(lambda0_tree, discrete_FCM)

## Error in treedata(phy, dat): object 'discrete_FCM' not found

# Is there a significant difference? NO 
# Using a likelihood ratio test
pchisq(2*(lambda0_fit$opt$lnL-discrete_lambda_fit$opt$lnL), 1, lower.tail=FALSE)

## Error in pchisq(2 * (lambda0_fit$opt$lnL - discrete_lambda_fit$opt$lnL), : object 'lambda0_fit' not found

Continuous Character Evolution

From Roi Holzman & Samantha Price:

Pagel’s lambda is a tree transformation that assesses the degree of phylogenetic signal within the trait by multiplying the internal branches of the tree by values between 0 and 1 (the lambda parameter).
- lambda=1 indicates complete phylogenetic patterning as it returns the branch lengths untransformed.
- lambda=0 indicates no patterning as it leads the tree collapsing to a single large polytomy.

# # We chose HNA and LNA RL Score as the trait we are testing for phylogenetic signal, with 999 randomizations:
# Does the HNA RL Score have phylogenetic signal?
phylosig(HNA_rooted_tree, HNAscores, method = "lambda", test = TRUE, nsim = 999)

## Error in phylosig(HNA_rooted_tree, HNAscores, method = "lambda", test = TRUE, : object 'HNA_rooted_tree' not found

# Does the LNA RL Score have phylogenetic signal?
phylosig(LNA_rooted_tree, LNAscores, method = "lambda", test = TRUE, nsim = 999)

## Error in phylosig(LNA_rooted_tree, LNAscores, method = "lambda", test = TRUE, : object 'LNA_rooted_tree' not found

Blomberg’s K

# HNA
phylosig(tree = rooted_tree, x = HNAscores, method = "K", test = TRUE, nsim = 999)

## Error in phylosig(tree = rooted_tree, x = HNAscores, method = "K", test = TRUE, : object 'rooted_tree' not found

# LNA
phylosig(tree = rooted_tree, x = LNAscores, method = "K", test = TRUE, nsim = 999)

## Error in phylosig(tree = rooted_tree, x = LNAscores, method = "K", test = TRUE, : object 'rooted_tree' not found

Moran’s I

# HNA: Calculate the pairwise distances of the OTUs in the HNA phylogeny
HNA_rooted_phy_dist <- cophenetic(HNA_rooted_tree)

## Error in cophenetic(HNA_rooted_tree): object 'HNA_rooted_tree' not found

# Do HNA OTUs have phylogenetic autocorrelation with the Moran's I test?
moran_HNA_test <- abouheif.moran(x = HNA_df_phydist$HNA_score, W = HNA_rooted_phy_dist, method = "Abouheif", nrepet = 999)

## Error in abouheif.moran(x = HNA_df_phydist$HNA_score, W = HNA_rooted_phy_dist, : object 'HNA_rooted_phy_dist' not found

moran_HNA_test

## Error in eval(expr, envir, enclos): object 'moran_HNA_test' not found

# LNA: Calculate the pairwise distances of the OTUs in the HNA phylogeny
LNA_rooted_phy_dist <- cophenetic(LNA_rooted_tree)

## Error in cophenetic(LNA_rooted_tree): object 'LNA_rooted_tree' not found

# Do LNA OTUs have phylogenetic autocorrelation with the Moran's I test?
moran_LNA_test <- abouheif.moran(x = LNA_df_phydist$LNA_score, W = LNA_rooted_phy_dist, method = "Abouheif", nrepet = 999)

## Error in abouheif.moran(x = LNA_df_phydist$LNA_score, W = LNA_rooted_phy_dist, : object 'LNA_rooted_phy_dist' not found

moran_LNA_test

## Error in eval(expr, envir, enclos): object 'moran_LNA_test' not found

Abouheif’s Cmean

# HNA: Perform an Abouheif moran test using Monte Carlo simulations (default is 999 randomizations)
abouheif_HNA_test <- abouheif.moran(x = HNA_df_phydist$HNA_score, W = HNA_rooted_phy_dist, method = "oriAbouheif", nrepet = 999)

## Error in abouheif.moran(x = HNA_df_phydist$HNA_score, W = HNA_rooted_phy_dist, : object 'HNA_rooted_phy_dist' not found

abouheif_HNA_test

## Error in eval(expr, envir, enclos): object 'abouheif_HNA_test' not found

# LNA: Perform an Abouheifmoran test using Monte Carlo simulations (default is 999 randomizations)
abouheif_LNA_test <- abouheif.moran(x = LNA_df_phydist$LNA_score, W = LNA_rooted_phy_dist, method = "oriAbouheif", nrepet = 999)

## Error in abouheif.moran(x = LNA_df_phydist$LNA_score, W = LNA_rooted_phy_dist, : object 'LNA_rooted_phy_dist' not found

abouheif_LNA_test

## Error in eval(expr, envir, enclos): object 'abouheif_LNA_test' not found

Session Information

devtools::session_info() # This will include session info with all R package version information

## ─ Session info ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
##  setting  value                       
##  version  R version 3.5.3 (2019-03-11)
##  os       macOS High Sierra 10.13.6   
##  system   x86_64, darwin15.6.0        
##  ui       X11                         
##  language (EN)                        
##  collate  en_US.UTF-8                 
##  ctype    en_US.UTF-8                 
##  tz       America/Chicago             
##  date     2019-04-17                  
## 
## ─ Packages ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
##  package           * version   date       lib source                                
##  ade4              * 1.7-13    2018-08-31 [1] CRAN (R 3.5.0)                        
##  adegenet            2.1.1     2018-02-02 [1] CRAN (R 3.5.0)                        
##  adephylo          * 1.1-11    2017-12-18 [1] CRAN (R 3.5.0)                        
##  animation           2.6       2018-12-11 [1] CRAN (R 3.5.0)                        
##  ape               * 5.2       2018-09-24 [1] CRAN (R 3.5.0)                        
##  assertthat          0.2.0     2017-04-11 [1] CRAN (R 3.5.0)                        
##  backports           1.1.3     2018-12-14 [1] CRAN (R 3.5.0)                        
##  base64enc           0.1-3     2015-07-28 [1] CRAN (R 3.5.0)                        
##  bindr               0.1.1     2018-03-13 [1] CRAN (R 3.5.0)                        
##  bindrcpp          * 0.2.2     2018-03-29 [1] CRAN (R 3.5.0)                        
##  Biobase             2.41.2    2018-07-18 [1] Bioconductor                          
##  BiocGenerics        0.27.1    2018-06-17 [1] Bioconductor                          
##  biomformat          1.7.0     2018-08-27 [1] Github (joey711/biomformat@4e13f6f)   
##  Biostrings          2.49.1    2018-08-04 [1] Bioconductor                          
##  bitops              1.0-6     2013-08-17 [1] CRAN (R 3.5.0)                        
##  boot                1.3-20    2017-08-06 [1] CRAN (R 3.5.3)                        
##  broom               0.5.1     2018-12-05 [1] CRAN (R 3.5.0)                        
##  callr               3.1.1     2018-12-21 [1] CRAN (R 3.5.0)                        
##  caTools             1.17.1.1  2018-07-20 [1] CRAN (R 3.5.0)                        
##  cellranger          1.1.0     2016-07-27 [1] CRAN (R 3.5.0)                        
##  cli                 1.0.1     2018-09-25 [1] CRAN (R 3.5.0)                        
##  cluster             2.0.7-1   2018-04-13 [1] CRAN (R 3.5.3)                        
##  clusterGeneration   1.3.4     2015-02-18 [1] CRAN (R 3.5.0)                        
##  coda                0.19-2    2018-10-08 [1] CRAN (R 3.5.0)                        
##  codetools           0.2-16    2018-12-24 [1] CRAN (R 3.5.3)                        
##  colorspace          1.4-0     2019-01-13 [1] CRAN (R 3.5.2)                        
##  combinat            0.0-8     2012-10-29 [1] CRAN (R 3.5.0)                        
##  cowplot           * 0.9.4     2019-01-08 [1] CRAN (R 3.5.2)                        
##  crayon              1.3.4     2017-09-16 [1] CRAN (R 3.5.0)                        
##  crosstalk           1.0.0     2016-12-21 [1] CRAN (R 3.5.0)                        
##  d3heatmap         * 0.6.1.2   2018-02-01 [1] CRAN (R 3.5.0)                        
##  data.table          1.12.0    2019-01-13 [1] CRAN (R 3.5.2)                        
##  deldir              0.1-16    2019-01-04 [1] CRAN (R 3.5.2)                        
##  desc                1.2.0     2018-05-01 [1] CRAN (R 3.5.0)                        
##  deSolve             1.21      2018-05-09 [1] CRAN (R 3.5.0)                        
##  devtools          * 2.0.1     2018-10-26 [1] CRAN (R 3.5.2)                        
##  digest              0.6.18    2018-10-10 [1] CRAN (R 3.5.0)                        
##  dplyr             * 0.7.8     2018-11-10 [1] CRAN (R 3.5.0)                        
##  DT                * 0.5       2018-11-05 [1] CRAN (R 3.5.0)                        
##  evaluate            0.12      2018-10-09 [1] CRAN (R 3.5.0)                        
##  expm                0.999-3   2018-09-22 [1] CRAN (R 3.5.0)                        
##  fansi               0.4.0     2018-10-05 [1] CRAN (R 3.5.0)                        
##  fastmatch           1.1-0     2017-01-28 [1] CRAN (R 3.5.0)                        
##  forcats           * 0.3.0     2018-02-19 [1] CRAN (R 3.5.0)                        
##  foreach             1.4.4     2017-12-12 [1] CRAN (R 3.5.0)                        
##  fs                  1.2.6     2018-08-23 [1] CRAN (R 3.5.0)                        
##  gdata               2.18.0    2017-06-06 [1] CRAN (R 3.5.0)                        
##  geiger            * 2.0.6.1   2019-01-16 [1] CRAN (R 3.5.2)                        
##  generics            0.0.2     2018-11-29 [1] CRAN (R 3.5.0)                        
##  ggplot2           * 3.1.0     2018-10-25 [1] CRAN (R 3.5.0)                        
##  glue                1.3.0     2018-07-17 [1] CRAN (R 3.5.0)                        
##  gmodels             2.18.1    2018-06-25 [1] CRAN (R 3.5.0)                        
##  gplots            * 3.0.1     2016-03-30 [1] CRAN (R 3.5.0)                        
##  gtable              0.2.0     2016-02-26 [1] CRAN (R 3.5.0)                        
##  gtools              3.8.1     2018-06-26 [1] CRAN (R 3.5.0)                        
##  haven               2.0.0     2018-11-22 [1] CRAN (R 3.5.0)                        
##  hms                 0.4.2     2018-03-10 [1] CRAN (R 3.5.0)                        
##  htmltools           0.3.6     2017-04-28 [1] CRAN (R 3.5.0)                        
##  htmlwidgets         1.3       2018-09-30 [1] CRAN (R 3.5.0)                        
##  httpuv              1.4.5.1   2018-12-18 [1] CRAN (R 3.5.0)                        
##  httr                1.4.0     2018-12-11 [1] CRAN (R 3.5.0)                        
##  igraph              1.2.2     2018-07-27 [1] CRAN (R 3.5.0)                        
##  IRanges             2.15.16   2018-07-18 [1] Bioconductor                          
##  iterators           1.0.10    2018-07-13 [1] CRAN (R 3.5.0)                        
##  jsonlite            1.6       2018-12-07 [1] CRAN (R 3.5.0)                        
##  KernSmooth          2.23-15   2015-06-29 [1] CRAN (R 3.5.3)                        
##  knitr               1.21      2018-12-10 [1] CRAN (R 3.5.2)                        
##  labeling            0.3       2014-08-23 [1] CRAN (R 3.5.0)                        
##  later               0.7.5     2018-09-18 [1] CRAN (R 3.5.0)                        
##  lattice             0.20-38   2018-11-04 [1] CRAN (R 3.5.3)                        
##  lazyeval            0.2.1     2017-10-29 [1] CRAN (R 3.5.0)                        
##  LearnBayes          2.15.1    2018-03-18 [1] CRAN (R 3.5.0)                        
##  lubridate           1.7.4     2018-04-11 [1] CRAN (R 3.5.0)                        
##  magrittr            1.5       2014-11-22 [1] CRAN (R 3.5.0)                        
##  maps              * 3.3.0     2018-04-03 [1] CRAN (R 3.5.0)                        
##  MASS                7.3-51.1  2018-11-01 [1] CRAN (R 3.5.3)                        
##  Matrix              1.2-15    2018-11-01 [1] CRAN (R 3.5.3)                        
##  memoise             1.1.0     2017-04-21 [1] CRAN (R 3.5.0)                        
##  mgcv                1.8-27    2019-02-06 [1] CRAN (R 3.5.3)                        
##  mime                0.6       2018-10-05 [1] CRAN (R 3.5.0)                        
##  mnormt              1.5-5     2016-10-15 [1] CRAN (R 3.5.0)                        
##  modelr              0.1.2     2018-05-11 [1] CRAN (R 3.5.0)                        
##  multtest            2.37.0    2018-06-13 [1] Bioconductor                          
##  munsell             0.5.0     2018-06-12 [1] CRAN (R 3.5.0)                        
##  mvtnorm             1.0-8     2018-05-31 [1] CRAN (R 3.5.0)                        
##  nlme                3.1-137   2018-04-07 [1] CRAN (R 3.5.3)                        
##  numDeriv            2016.8-1  2016-08-27 [1] CRAN (R 3.5.0)                        
##  permute             0.9-4     2016-09-09 [1] CRAN (R 3.5.0)                        
##  phangorn            2.4.0     2018-02-15 [1] CRAN (R 3.5.0)                        
##  phylobase           0.8.4     2018-07-06 [1] Github (fmichonneau/phylobase@0f4db8f)
##  phyloseq          * 1.25.2    2018-07-15 [1] Bioconductor                          
##  phytools          * 0.6-60    2018-09-28 [1] CRAN (R 3.5.0)                        
##  pillar              1.3.1     2018-12-15 [1] CRAN (R 3.5.0)                        
##  pkgbuild            1.0.2     2018-10-16 [1] CRAN (R 3.5.0)                        
##  pkgconfig           2.0.2     2018-08-16 [1] CRAN (R 3.5.0)                        
##  pkgload             1.0.2     2018-10-29 [1] CRAN (R 3.5.0)                        
##  plotrix             3.7-4     2018-10-03 [1] CRAN (R 3.5.0)                        
##  plyr                1.8.4     2016-06-08 [1] CRAN (R 3.5.0)                        
##  png                 0.1-7     2013-12-03 [1] CRAN (R 3.5.0)                        
##  prettyunits         1.0.2     2015-07-13 [1] CRAN (R 3.5.0)                        
##  processx            3.2.1     2018-12-05 [1] CRAN (R 3.5.0)                        
##  progress            1.2.0     2018-06-14 [1] CRAN (R 3.5.0)                        
##  promises            1.0.1     2018-04-13 [1] CRAN (R 3.5.0)                        
##  ps                  1.3.0     2018-12-21 [1] CRAN (R 3.5.0)                        
##  purrr             * 0.3.0     2019-01-27 [1] CRAN (R 3.5.2)                        
##  quadprog            1.5-5     2013-04-17 [1] CRAN (R 3.5.0)                        
##  R6                  2.3.0     2018-10-04 [1] CRAN (R 3.5.0)                        
##  Rcpp                1.0.0     2018-11-07 [1] CRAN (R 3.5.0)                        
##  readr             * 1.3.1     2018-12-21 [1] CRAN (R 3.5.0)                        
##  readxl              1.2.0     2018-12-19 [1] CRAN (R 3.5.0)                        
##  remotes             2.0.2     2018-10-30 [1] CRAN (R 3.5.0)                        
##  reshape2            1.4.3     2017-12-11 [1] CRAN (R 3.5.0)                        
##  rhdf5               2.25.7    2018-08-23 [1] Bioconductor                          
##  Rhdf5lib            1.3.2     2018-08-23 [1] Bioconductor                          
##  rlang               0.3.1     2019-01-08 [1] CRAN (R 3.5.2)                        
##  rmarkdown           1.11      2018-12-08 [1] CRAN (R 3.5.0)                        
##  rncl                0.8.3     2018-07-27 [1] CRAN (R 3.5.0)                        
##  RNeXML              2.2.0     2018-11-01 [1] CRAN (R 3.5.0)                        
##  rprojroot           1.3-2     2018-01-03 [1] CRAN (R 3.5.0)                        
##  rstudioapi          0.9.0     2019-01-09 [1] CRAN (R 3.5.2)                        
##  rvcheck             0.1.3     2018-12-06 [1] CRAN (R 3.5.0)                        
##  rvest               0.3.2     2016-06-17 [1] CRAN (R 3.5.0)                        
##  S4Vectors           0.19.19   2018-07-18 [1] Bioconductor                          
##  scales              1.0.0     2018-08-09 [1] CRAN (R 3.5.0)                        
##  scatterplot3d       0.3-41    2018-03-14 [1] CRAN (R 3.5.0)                        
##  seqinr              3.4-5     2017-08-01 [1] CRAN (R 3.5.0)                        
##  sessioninfo         1.1.1     2018-11-05 [1] CRAN (R 3.5.0)                        
##  shiny               1.2.0     2018-11-02 [1] CRAN (R 3.5.0)                        
##  sp                  1.3-1     2018-06-05 [1] CRAN (R 3.5.0)                        
##  spData              0.3.0     2019-01-07 [1] CRAN (R 3.5.2)                        
##  spdep               0.8-1     2018-11-21 [1] CRAN (R 3.5.0)                        
##  stringi             1.2.4     2018-07-20 [1] CRAN (R 3.5.0)                        
##  stringr           * 1.4.0     2019-02-10 [1] CRAN (R 3.5.2)                        
##  subplex             1.5-4     2018-04-05 [1] CRAN (R 3.5.0)                        
##  survival            2.43-3    2018-11-26 [1] CRAN (R 3.5.3)                        
##  testthat            2.0.1     2018-10-13 [1] CRAN (R 3.5.0)                        
##  tibble            * 2.0.1     2019-01-12 [1] CRAN (R 3.5.2)                        
##  tidyr             * 0.8.2     2018-10-28 [1] CRAN (R 3.5.0)                        
##  tidyselect          0.2.5     2018-10-11 [1] CRAN (R 3.5.0)                        
##  tidytree            0.2.1     2018-12-19 [1] CRAN (R 3.5.0)                        
##  tidyverse         * 1.2.1     2017-11-14 [1] CRAN (R 3.5.0)                        
##  usethis           * 1.4.0     2018-08-14 [1] CRAN (R 3.5.0)                        
##  utf8                1.1.4     2018-05-24 [1] CRAN (R 3.5.0)                        
##  uuid                0.1-2     2015-07-28 [1] CRAN (R 3.5.0)                        
##  vegan               2.5-3     2018-10-25 [1] CRAN (R 3.5.0)                        
##  withr               2.1.2     2018-03-15 [1] CRAN (R 3.5.0)                        
##  xfun                0.4       2018-10-23 [1] CRAN (R 3.5.0)                        
##  XML                 3.98-1.16 2018-08-19 [1] CRAN (R 3.5.0)                        
##  xml2                1.2.0     2018-01-24 [1] CRAN (R 3.5.0)                        
##  xtable              1.8-3     2018-08-29 [1] CRAN (R 3.5.0)                        
##  XVector             0.21.3    2018-06-23 [1] Bioconductor                          
##  yaml                2.2.0     2018-07-25 [1] CRAN (R 3.5.0)                        
##  zlibbioc            1.27.0    2018-06-13 [1] Bioconductor                          
## 
## [1] /Library/Frameworks/R.framework/Versions/3.5/Resources/library

Analyses by Marian

Marian L. Schmidt, marschmi@umich.edu, @micro_marian

July 25th, 2018

Load Libraries & Set Themes

Load FCM Data

Summary & Descriptive Statistics

Number of cells per system

Figure 1

Figure 1A: Plotted By Lake System

Figure 1B & 1D: Fraction HNA vs Productivity

Figure 4: Heatmap

Load RL Score data

Plot figure 4: RL Scores

Top 10 from Fig 4 Scores

Figure 5: Phylogenetic Tree

Prepare files for iTOL - Figure 5

How many OTUs per system?

Number of OTUs per phylum

Specific Bacterial Phyla

Phylogenetic Signal

Prepare data

Pagel’s Lambda

Discrete Character Evolution

Continuous Character Evolution

Blomberg’s K

Moran’s I

Abouheif’s Cmean

Session Information