Data Input & Cleaning Tutorial Here
# Install Packages
#install.packages("HelpersMG")
#install.packages("WGCNA")
# The GO.db package will not let me load WGCNA, so let's download it
#if (!requireNamespace("BiocManager", quietly = TRUE))
# install.packages("BiocManager")
#BiocManager::install("GO.db", version = "3.8")
# Load the packages
library(HelpersMG)
library(WGCNA)
library(tidyverse)
library(phyloseq)
# Set the working directory
current_directory <- getwd()
paste("Working in directory:", current_directory)
## [1] "Working in directory: /Users/marschmi/git_repos/HNA_LNA_productivity"
# Load data
load("data/Chloroplasts_removed/ByLake_Filtering/5in10/muskegon/muskegon_5in10_physeqs.RData")
rare_muskegon_physeq_5in10_abs
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 482 taxa and 62 samples ]
## sample_data() Sample Data: [ 62 samples by 18 sample variables ]
## tax_table() Taxonomy Table: [ 482 taxa by 7 taxonomic ranks ]
# The following setting is important, do not omit.
options(stringsAsFactors = FALSE)
# Prepare CLR transformed OTU Table
clr_otu <- read.csv("data/Chloroplasts_removed/ByLake_Filtering/5in10/muskegon/muskegon_clr_otu_5in10.csv") %>%
tibble::column_to_rownames(var = "X")
# Prepare Taxonomy Table
clr_tax <- tax_table(rare_muskegon_physeq_5in10_abs)
# Prepare Metadata file
clr_metadata <-
sample_data(rare_muskegon_physeq_5in10_abs) %>%
mutate(norep_filter_name = paste(substr(Sample_16S, 1,4), substr(Sample_16S, 6, 9), sep=""),
# duplicate Sample_16S column to move it to the rownames
names = Sample_16S) %>%
tibble::column_to_rownames(var = "names")
# Create the phyloseq object
clr_physeq <- merge_phyloseq(otu_table(clr_otu, taxa_are_rows = FALSE), clr_tax, sample_data(clr_metadata))
# Read in the producutivity data
prod_data <- read.csv("data/production_data.csv") %>%
dplyr::filter(fraction == "Free" & limnion == "Surface") %>% # Select only rows that are free-living
dplyr::select(names, tot_bacprod, SD_tot_bacprod) %>% # Select relevant columns for total productivity
mutate(tot_bacprod = round(tot_bacprod, digits = 2), # Round to 2 decimals
SD_tot_bacprod = round(SD_tot_bacprod, digits = 2) ) %>%
dplyr::rename(norep_filter_name = names) %>% # Rename to match other data frame
arrange(norep_filter_name)
# Put together a dataframe with metadata and production data
meta_prod_dat <-
prod_data %>%
left_join(clr_metadata ,by = "norep_filter_name") %>%
mutate(names = Sample_16S) %>%
tibble::column_to_rownames(var = "names")
###### FINAL PHYLOSEQ OBJECT
prod_clr_physeq_orig <- subset_samples(clr_physeq, Sample_16S %in% meta_prod_dat$Sample_16S)
# Overwrite the sample data to include productivity data
prod_clr_physeq <- merge_phyloseq(otu_table(prod_clr_physeq_orig), tax_table(prod_clr_physeq_orig), sample_data(meta_prod_dat))
prod_clr_physeq
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 482 taxa and 23 samples ]
## sample_data() Sample Data: [ 23 samples by 21 sample variables ]
## tax_table() Taxonomy Table: [ 482 taxa by 7 taxonomic ranks ]
# Pull out the information and clean up the data
prod_clr_otu <- otu_table(prod_clr_physeq)
datOTUClr <- as.data.frame(prod_clr_otu)
# Metadata
prod_clr_dat <- data.frame(sample_data(prod_clr_physeq))
# Select only the numeric traits
datContinuous <-
prod_clr_dat %>%
dplyr::select(c(tot_bacprod, SD_tot_bacprod, Total.cells, Total.count.sd,
HNA.cells, HNA.sd, LNA.cells, LNA.sd, Total_Sequences))
head(datContinuous)
## tot_bacprod SD_tot_bacprod Total.cells Total.count.sd HNA.cells
## MBRE1F714 33.86 1.30 10104066 55102.71 2724102
## MBRE1F914 24.72 0.36 7206483 65178.68 1928397
## MBRE1F715 11.87 2.15 7420899 70408.32 1343202
## MBRE2F515 29.34 10.78 4326956 96174.94 1739097
## MBRE2F915 22.23 0.83 4895228 22853.30 1264624
## MDPE1F514 43.30 4.65 4412738 112621.94 2396948
## HNA.sd LNA.cells LNA.sd Total_Sequences
## MBRE1F714 57003.29 7379964 57715.92 33038
## MBRE1F914 42404.03 5278086 54019.71 24538
## MBRE1F715 44613.63 6077697 31710.77 17500
## MBRE2F515 70451.47 2587859 31361.59 27722
## MBRE2F915 16800.30 3630604 14092.64 21845
## MDPE1F514 114096.60 2015790 100625.07 104975
Below is an analysis to find out whether there are outliers. Thankfully, there are none!
# Check for OTUs and samples with too many missing values
good_otus <- goodSamplesGenes(datOTUClr, verbose = 3);
## Flagging genes and samples with too many missing values...
## ..step 1
good_otus$allOK
## [1] TRUE
# Search the data for outliers via clustering
#sampleTree <- hclust(dist(datOTUClr), method = "average")
#sampleTree
# Plot it
#sizeGrWindow(12, 9) # Set plotting window to be 12 by 9 inches
#par(cex = 1, mar = c(0,4,2,0))
#plot(sampleTree, main = "Sample Clustering: No Outliers",
# sub = "", xlab = "", cex.lab = 1.5, cex.axis = 1.5, cex.main = 2)
# Clust contains the samples we want to keep
paste("There are", ntaxa(prod_clr_physeq), "genes in the dataset.")
## [1] "There are 482 genes in the dataset."
paste("There are", nsamples(prod_clr_physeq), "samples in the dataset.")
## [1] "There are 23 samples in the dataset."
# Re-cluster samples
sampleTree2 <- hclust(dist(datOTUClr), method = "average")
# Convert traits to a color representation: white means low, red means high, grey means missing entry
traitColors <- numbers2colors(datContinuous, signed = FALSE);
# Plot the sample dendrogram and the colors underneath.
plotDendroAndColors(sampleTree2, traitColors,
groupLabels = names(datContinuous),
main = "Sample dendrogram and trait heatmap")
Figure: Clustering dendrogram of samples based on their Euclidean distance.
save(prod_clr_physeq, datOTUClr, datContinuous, file = "data/WGCNA/WGCNA_data.RData")
Automatic Netrowk Construction Tutorial Here
# Choose a set of soft-thresholding powers
powers <- c(c(1:10), seq(from = 12, to=20, by=2))
# Call the network topology analysis function
sft <- pickSoftThreshold(datOTUClr, powerVector = powers, verbose = 5)
## pickSoftThreshold: will use block size 482.
## pickSoftThreshold: calculating connectivity for given powers...
## ..working on genes 1 through 482 of 482
## Power SFT.R.sq slope truncated.R.sq mean.k. median.k. max.k.
## 1 1 0.225 -0.69 0.73300 130.000 127.0000 214.0
## 2 2 0.834 -1.10 0.92500 52.200 46.6000 125.0
## 3 3 0.925 -1.22 0.93700 25.900 20.4000 84.5
## 4 4 0.943 -1.26 0.93100 14.700 9.9900 62.4
## 5 5 0.910 -1.31 0.88500 9.200 5.2700 49.0
## 6 6 0.813 -1.42 0.76400 6.200 2.9300 40.1
## 7 7 0.896 -1.33 0.86600 4.430 1.6900 33.8
## 8 8 0.939 -1.26 0.93400 3.310 1.0200 29.2
## 9 9 0.893 -1.28 0.87600 2.560 0.6470 25.8
## 10 10 0.206 -2.23 -0.00829 2.050 0.4300 23.1
## 11 12 0.839 -1.25 0.81100 1.400 0.2010 19.2
## 12 14 0.187 -1.86 -0.04490 1.020 0.0960 16.4
## 13 16 0.843 -1.20 0.82200 0.788 0.0505 14.4
## 14 18 0.165 -1.62 -0.04570 0.629 0.0267 12.9
## 15 20 0.197 -1.71 -0.03110 0.517 0.0152 11.6
# Plot the results:
sizeGrWindow(9, 5)
par(mfrow = c(1,2))
# Set some parameters
cex1 = 0.9
# Scale-free topology fit index as a function of the soft-thresholding power
plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
xlab="Soft Threshold (power)",ylab="Scale Free Topology Model Fit,signed R^2",type="n",
main = paste("Scale independence"))
text(sft$fitIndices[,1], -sign(sft$fitIndices[,3]) * sft$fitIndices[,2],
labels=powers,cex=cex1,col="red")
# this line corresponds to using an R^2 cut-off of h
abline(h=0.90,col="red")
# Mean connectivity as a function of the soft-thresholding power
plot(sft$fitIndices[,1], sft$fitIndices[,5],
xlab="Soft Threshold (power)",ylab="Mean Connectivity", type="n",
main = paste("Mean connectivity"))
text(sft$fitIndices[,1], sft$fitIndices[,5], labels=powers, cex=cex1,col="red")
# Turn adjacency into topological overlap matrix (TOM)
adjacency <- adjacency(datOTUClr, power = 3)
TOMadj <- TOMsimilarity(adjacency)
## ..connectivity..
## ..matrix multiplication (system BLAS)..
## ..normalization..
## ..done.
dissTOMadj <- 1- TOMadj
# Clustering using TOM
# Call the hierarchical clustering function
hclustGeneTree <- hclust(as.dist(dissTOMadj), method = "average")
# Plot the resulting clustering tree (dendogram)
sizeGrWindow(12, 9)
plot(hclustGeneTree, xlab = "", sub = "",
main = "Gene Clustering on TOM-based disssimilarity",
labels = FALSE, hang = 0.04)
# Make the modules larger, so set the minimum higher
minModuleSize <- 12
# Module ID using dynamic tree cut
dynamicMods <- cutreeDynamic(dendro = hclustGeneTree,
distM = dissTOMadj,
deepSplit = 2, pamRespectsDendro = FALSE,
minClusterSize = minModuleSize)
## ..cutHeight not given, setting it to 0.964 ===> 99% of the (truncated) height range in dendro.
## ..done.
table(dynamicMods)
## dynamicMods
## 1 2 3 4 5
## 315 61 38 36 32
# Convert numeric lables into colors
dynamicColors <- labels2colors(dynamicMods)
table(dynamicColors)
## dynamicColors
## blue brown green turquoise yellow
## 61 38 32 315 36
# Plot the dendrogram and colors underneath
sizeGrWindow(8,6)
plotDendroAndColors(hclustGeneTree, dynamicColors, "Dynamic Tree Cut",
dendroLabels = FALSE, hang = 0.03,
addGuide = TRUE, guideHang = 0.05,
main = "Gene dendrogram and module colors")
# Calculate eigengenes
dynamic_MEList <- moduleEigengenes(datOTUClr, colors = dynamicColors)
dynamic_MEs <- dynamic_MEList$eigengenes
# Calculate dissimilarity of module eigengenes
dynamic_MEDiss <- 1-cor(dynamic_MEs)
dynamic_METree <- hclust(as.dist(dynamic_MEDiss))
# Plot the hclust
sizeGrWindow(7,6)
plot(dynamic_METree, main = "Dynamic Clustering of module eigengenes",
xlab = "", sub = "")
######################## MERGE SIMILAR MODULES
dynamic_MEDissThres <- 0.25
# Plot the cut line
#abline(h = dynamic_MEDissThres, col = "red")
# Call an automatic merging function
merge_dynamic_MEDs <- mergeCloseModules(datOTUClr, dynamicColors, cutHeight = dynamic_MEDissThres, verbose = 3)
## mergeCloseModules: Merging modules whose distance is less than 0.25
## multiSetMEs: Calculating module MEs.
## Working on set 1 ...
## moduleEigengenes: Calculating 5 module eigengenes in given set.
## Calculating new MEs...
## multiSetMEs: Calculating module MEs.
## Working on set 1 ...
## moduleEigengenes: Calculating 5 module eigengenes in given set.
# The Merged Colors
dynamic_mergedColors <- merge_dynamic_MEDs$colors
# Eigen genes of the new merged modules
mergedMEs <- merge_dynamic_MEDs$newMEs
mergedMEs
## MEblue MEgreen MEbrown MEturquoise MEyellow
## MBRE1F714 -0.07477574 -0.135290324 0.416986081 -0.17757671 -0.13452258
## MBRE1F914 0.14871655 0.501261302 -0.150849213 -0.13435389 -0.27162703
## MBRE1F715 -0.11141899 -0.044305166 0.041025250 -0.10862524 0.29468872
## MBRE2F515 -0.13225971 -0.074095503 -0.056890112 0.26052456 0.29750960
## MBRE2F915 0.40051486 -0.022608041 -0.090142177 -0.16335447 0.05200960
## MDPE1F514 -0.15418589 -0.091568612 -0.145805291 0.37591018 -0.11844501
## MDPE1F714 -0.14999524 -0.225154636 0.366447475 -0.16630213 -0.28959076
## MDPE1F914 0.13727902 0.592327410 -0.134733998 -0.14500424 -0.26859169
## MDPE1F715 -0.15252747 -0.125455030 0.078769473 -0.12329157 0.15924140
## MDPE2F515 -0.10125151 0.004176903 -0.005615171 0.25589205 0.34611912
## MDPE2F915 0.43111546 -0.132479691 -0.189948738 -0.16460255 -0.06387725
## MINE1F514 -0.10813300 -0.054487484 -0.127827478 0.35762970 0.02639579
## MINE1F714 -0.18062524 -0.186213416 0.316822686 -0.12653316 0.03401600
## MINE1F914 -0.02528633 0.190380196 -0.145853239 -0.03852416 -0.16041303
## MINE1F715 -0.16580270 -0.137093351 -0.037690616 -0.02880182 0.16812382
## MINE2F515 -0.12461981 -0.053974187 -0.128431941 0.26389711 0.17673427
## MINE2F915 0.31545737 -0.117989551 -0.191235301 -0.09072100 0.09146613
## MOTE1F514 -0.17484364 -0.111442589 -0.150322176 0.35322592 -0.17748902
## MOTE1F714 -0.13739765 -0.042859458 0.579147918 -0.17984627 -0.26019467
## MOTE1F914 0.18372613 0.368564362 -0.105190531 -0.17499813 -0.29648555
## MOTE1F715 -0.12900770 -0.082537395 0.014542652 -0.13484198 0.24872978
## MOTE2F515 -0.13328496 -0.089015640 -0.056458549 0.28200293 0.23995399
## MOTE2F915 0.43860620 0.069859900 -0.096747004 -0.19170514 -0.09375166
table(dynamic_mergedColors)
## dynamic_mergedColors
## blue brown green turquoise yellow
## 61 38 32 315 36
sizeGrWindow(12,9)
plotDendroAndColors(hclustGeneTree, cbind(dynamicColors, dynamic_mergedColors),
c("Dynamic Tree Cut", "Merged dynamic"),
dendroLabels = FALSE, hang = 0.03,
addGuide = TRUE, guideHang = 0.05)
# Rename Module Colors
moduleColors <- dynamic_mergedColors
# Construct numerical labels corresponding to the colors
colorOrder <- c("grey", standardColors(50))
moduleLabels <- match(moduleColors, colorOrder)-1
MEs <- mergedMEs
save(MEs, moduleLabels, moduleColors, hclustGeneTree, file = "data/WGCNA/dynamic-prodOTU-02-networkConstruction.RData")
Figure: Clustering dendrogram of genes obtained in the single-block analysis, together with module colors determined in the single-block analysis and the module colors determined in the block-wise analysis. There is excellent agreement between the single-block and block-wise network construction and module detection.
Relating Modules to Traits Tutorial Here
# define numbers of genes and samples
nOTUs <- ncol(datOTUClr)
nSamples <- nrow(datOTUClr)
# Recalculate MEs with color labels
MEs0 <- moduleEigengenes(datOTUClr, moduleColors)$eigengenes
MEs <- orderMEs(MEs0)
names(MEs) <- substring(names(MEs), 3)
moduleTraitCor <- cor(MEs, datContinuous, use = "p")
moduleTraitPvalue = corPvalueStudent(moduleTraitCor, nSamples)
# PLOT
sizeGrWindow(10,6)
textMatrix <- paste(signif(moduleTraitCor, 2), "\n(", signif(moduleTraitPvalue, 1), ")", sep = "")
dim(textMatrix) <- dim(moduleTraitCor)
par(mar = c(6, 8.5, 3, 3))
# Display the correlation values within a heatmap
labeledHeatmap(Matrix = moduleTraitCor,
xLabels = names(datContinuous),
yLabels = names(MEs),
ySymbols = names(MEs),
colorLabels = FALSE,
colors = blueWhiteRed(50),
textMatrix = textMatrix,
setStdMargins = FALSE,
cex.text = 0.5,
zlim = c(-1,1),
main = paste("Module-trait Relationships"))
Figure: Relationships of consensus module eignegenes and traits of samples collected from Muskegon Lake. Each row in the table corresponds to a consensus module (of OTUs), and each column is a trait. Numbers in the table report the correlations of the corresponding module eigengenes and traits, with a p-value printed below in parentheses. The table is color coded by correlation according to the legend.
We are interested in classifying the modules that are associated with bacterial production. So therefore, the trait of interest is tot_bacprod.
From the plot above, we can see that the following modules as significantly (pvalues in the parentheses):
Goal: Quantify associations of individual OTU with trait of interest (tot_bacprod)
How?
# Define variable weight containing the weight column of datTrait
bacprod <- as.data.frame(datContinuous$tot_bacprod)
names(bacprod) <- "bacprod" # rename
# Names (colors of the modules
#modNames <- substring(names(MEs), 3) # Remove the first two letters in string
# Calculate the correlations between modules
geneModuleMembership <- as.data.frame(WGCNA::cor(datOTUClr, MEs, use = "p"))
# What are the p-values for each correlation?
MMPvalue <- as.data.frame(corPvalueStudent(as.matrix(geneModuleMembership), nSamples))
# What's the correlation for the trait: bacterial production?
geneTraitSignificance <- as.data.frame(cor(datOTUClr, bacprod, use = "p"))
# What are the p-values for each correlation?
GSPvalue <- as.data.frame(corPvalueStudent(as.matrix(geneTraitSignificance), nSamples = nSamples))
names(geneTraitSignificance) <- paste("GS.", names(bacprod), sep = "")
names(GSPvalue) <- paste("p.GS.", names(bacprod), sep = "")
Using GS and MM, let’s identify genes with a high significance for bacterial production as well as high module membership in interesting modules.
# Fix the names so that they match the actual color
#names(geneModuleMembership) <- substring(names(geneModuleMembership), 3) # Remove the first two letters in string
modNames <- names(geneModuleMembership)
par(mfrow = c(2,3))
# Initialize for loop
# NEED: modNames
for (i in names(geneModuleMembership)) {
# Pull out the module we're working on
module <- i
print(module)
# Find the index in the column of the dataframe
column <- match(module, modNames)
#print(column)
# Pull out the Gene Significance vs module membership of the module
moduleGenes = moduleColors == module
vecOTUnames = rownames(geneTraitSignificance)
print(paste("There are ", length(vecOTUnames[moduleGenes]), " OTUs in the ", module, " module.", sep = ""))
print(vecOTUnames[moduleGenes])
# Make the plot
verboseScatterplot(abs(geneModuleMembership[moduleGenes, column]),
abs(geneTraitSignificance[moduleGenes, 1]),
xlab = paste("Module Membership in", module, "module"),
ylab = "Gene significance for bacterial production",
main = paste("Module membership vs. gene significnace \n"),
cex.main = 1.2, cex.lab = 1.2, cex.axis = 1.2, col = module)
}
## [1] "blue"
## [1] "There are 61 OTUs in the blue module."
## [1] "Otu000034" "Otu000058" "Otu000067" "Otu000087" "Otu000091"
## [6] "Otu000095" "Otu000116" "Otu000122" "Otu000127" "Otu000141"
## [11] "Otu000149" "Otu000161" "Otu000193" "Otu000232" "Otu000238"
## [16] "Otu000242" "Otu000244" "Otu000268" "Otu000269" "Otu000292"
## [21] "Otu000312" "Otu000338" "Otu000339" "Otu000358" "Otu000359"
## [26] "Otu000386" "Otu000413" "Otu000423" "Otu000438" "Otu000472"
## [31] "Otu000481" "Otu000503" "Otu000505" "Otu000519" "Otu000522"
## [36] "Otu000568" "Otu000625" "Otu000654" "Otu000673" "Otu000679"
## [41] "Otu000701" "Otu000729" "Otu000730" "Otu000753" "Otu000843"
## [46] "Otu000905" "Otu000923" "Otu000961" "Otu001066" "Otu001084"
## [51] "Otu001137" "Otu001166" "Otu001180" "Otu001266" "Otu001298"
## [56] "Otu001366" "Otu001402" "Otu001650" "Otu001931" "Otu002026"
## [61] "Otu002112"
## [1] "green"
## [1] "There are 32 OTUs in the green module."
## [1] "Otu000033" "Otu000060" "Otu000081" "Otu000131" "Otu000176"
## [6] "Otu000322" "Otu000332" "Otu000344" "Otu000449" "Otu000517"
## [11] "Otu000521" "Otu000593" "Otu000612" "Otu000613" "Otu000615"
## [16] "Otu000644" "Otu000665" "Otu000711" "Otu000766" "Otu000796"
## [21] "Otu000924" "Otu000938" "Otu001030" "Otu001100" "Otu001120"
## [26] "Otu001171" "Otu001210" "Otu001293" "Otu001476" "Otu001487"
## [31] "Otu001691" "Otu002041"
## [1] "brown"
## [1] "There are 38 OTUs in the brown module."
## [1] "Otu000041" "Otu000044" "Otu000062" "Otu000126" "Otu000190"
## [6] "Otu000195" "Otu000205" "Otu000212" "Otu000240" "Otu000246"
## [11] "Otu000267" "Otu000451" "Otu000462" "Otu000636" "Otu000637"
## [16] "Otu000670" "Otu000680" "Otu000682" "Otu000715" "Otu000743"
## [21] "Otu000782" "Otu000821" "Otu000837" "Otu000860" "Otu000878"
## [26] "Otu000888" "Otu000894" "Otu000917" "Otu001086" "Otu001136"
## [31] "Otu001158" "Otu001246" "Otu001464" "Otu001520" "Otu001574"
## [36] "Otu001602" "Otu001831" "Otu001913"
## [1] "turquoise"
## [1] "There are 315 OTUs in the turquoise module."
## [1] "Otu000001" "Otu000005" "Otu000006" "Otu000007" "Otu000009"
## [6] "Otu000010" "Otu000014" "Otu000016" "Otu000017" "Otu000018"
## [11] "Otu000019" "Otu000020" "Otu000022" "Otu000023" "Otu000025"
## [16] "Otu000026" "Otu000027" "Otu000029" "Otu000030" "Otu000038"
## [21] "Otu000040" "Otu000042" "Otu000043" "Otu000047" "Otu000048"
## [26] "Otu000049" "Otu000050" "Otu000057" "Otu000059" "Otu000061"
## [31] "Otu000063" "Otu000065" "Otu000066" "Otu000068" "Otu000071"
## [36] "Otu000073" "Otu000074" "Otu000075" "Otu000076" "Otu000078"
## [41] "Otu000079" "Otu000080" "Otu000082" "Otu000083" "Otu000084"
## [46] "Otu000086" "Otu000090" "Otu000093" "Otu000094" "Otu000096"
## [51] "Otu000098" "Otu000099" "Otu000100" "Otu000101" "Otu000103"
## [56] "Otu000105" "Otu000106" "Otu000107" "Otu000108" "Otu000109"
## [61] "Otu000113" "Otu000115" "Otu000119" "Otu000120" "Otu000123"
## [66] "Otu000124" "Otu000128" "Otu000132" "Otu000133" "Otu000136"
## [71] "Otu000137" "Otu000139" "Otu000144" "Otu000147" "Otu000150"
## [76] "Otu000151" "Otu000152" "Otu000154" "Otu000158" "Otu000159"
## [81] "Otu000160" "Otu000163" "Otu000165" "Otu000167" "Otu000168"
## [86] "Otu000169" "Otu000171" "Otu000172" "Otu000173" "Otu000175"
## [91] "Otu000179" "Otu000182" "Otu000183" "Otu000184" "Otu000187"
## [96] "Otu000189" "Otu000191" "Otu000192" "Otu000194" "Otu000196"
## [101] "Otu000203" "Otu000204" "Otu000207" "Otu000210" "Otu000213"
## [106] "Otu000216" "Otu000217" "Otu000218" "Otu000219" "Otu000221"
## [111] "Otu000222" "Otu000223" "Otu000224" "Otu000226" "Otu000227"
## [116] "Otu000230" "Otu000235" "Otu000247" "Otu000248" "Otu000249"
## [121] "Otu000253" "Otu000255" "Otu000257" "Otu000259" "Otu000261"
## [126] "Otu000262" "Otu000263" "Otu000264" "Otu000266" "Otu000271"
## [131] "Otu000273" "Otu000275" "Otu000277" "Otu000279" "Otu000283"
## [136] "Otu000284" "Otu000285" "Otu000289" "Otu000290" "Otu000291"
## [141] "Otu000294" "Otu000296" "Otu000297" "Otu000298" "Otu000302"
## [146] "Otu000305" "Otu000309" "Otu000313" "Otu000316" "Otu000317"
## [151] "Otu000319" "Otu000320" "Otu000323" "Otu000325" "Otu000328"
## [156] "Otu000330" "Otu000336" "Otu000340" "Otu000343" "Otu000346"
## [161] "Otu000349" "Otu000355" "Otu000356" "Otu000361" "Otu000373"
## [166] "Otu000377" "Otu000379" "Otu000384" "Otu000385" "Otu000388"
## [171] "Otu000389" "Otu000390" "Otu000392" "Otu000399" "Otu000400"
## [176] "Otu000402" "Otu000403" "Otu000407" "Otu000408" "Otu000409"
## [181] "Otu000410" "Otu000414" "Otu000416" "Otu000418" "Otu000421"
## [186] "Otu000422" "Otu000426" "Otu000427" "Otu000428" "Otu000429"
## [191] "Otu000434" "Otu000440" "Otu000445" "Otu000453" "Otu000456"
## [196] "Otu000458" "Otu000459" "Otu000460" "Otu000469" "Otu000470"
## [201] "Otu000471" "Otu000474" "Otu000478" "Otu000482" "Otu000487"
## [206] "Otu000494" "Otu000496" "Otu000506" "Otu000509" "Otu000511"
## [211] "Otu000514" "Otu000515" "Otu000535" "Otu000536" "Otu000539"
## [216] "Otu000547" "Otu000559" "Otu000563" "Otu000574" "Otu000578"
## [221] "Otu000581" "Otu000587" "Otu000590" "Otu000604" "Otu000605"
## [226] "Otu000614" "Otu000623" "Otu000631" "Otu000645" "Otu000651"
## [231] "Otu000655" "Otu000660" "Otu000669" "Otu000672" "Otu000681"
## [236] "Otu000708" "Otu000712" "Otu000716" "Otu000722" "Otu000725"
## [241] "Otu000745" "Otu000748" "Otu000749" "Otu000756" "Otu000764"
## [246] "Otu000770" "Otu000771" "Otu000774" "Otu000778" "Otu000779"
## [251] "Otu000787" "Otu000799" "Otu000815" "Otu000827" "Otu000832"
## [256] "Otu000833" "Otu000836" "Otu000848" "Otu000851" "Otu000865"
## [261] "Otu000885" "Otu000893" "Otu000898" "Otu000899" "Otu000900"
## [266] "Otu000904" "Otu000931" "Otu000932" "Otu000937" "Otu000977"
## [271] "Otu000985" "Otu001018" "Otu001021" "Otu001023" "Otu001059"
## [276] "Otu001067" "Otu001078" "Otu001087" "Otu001089" "Otu001124"
## [281] "Otu001132" "Otu001142" "Otu001183" "Otu001200" "Otu001232"
## [286] "Otu001233" "Otu001236" "Otu001242" "Otu001256" "Otu001267"
## [291] "Otu001273" "Otu001300" "Otu001317" "Otu001329" "Otu001330"
## [296] "Otu001368" "Otu001387" "Otu001439" "Otu001451" "Otu001452"
## [301] "Otu001473" "Otu001570" "Otu001580" "Otu001664" "Otu001672"
## [306] "Otu001697" "Otu001767" "Otu001799" "Otu001835" "Otu001846"
## [311] "Otu001959" "Otu002142" "Otu002234" "Otu002374" "Otu002481"
## [1] "yellow"
## [1] "There are 36 OTUs in the yellow module."
## [1] "Otu000004" "Otu000011" "Otu000012" "Otu000024" "Otu000036"
## [6] "Otu000046" "Otu000053" "Otu000064" "Otu000070" "Otu000085"
## [11] "Otu000088" "Otu000104" "Otu000112" "Otu000135" "Otu000143"
## [16] "Otu000162" "Otu000293" "Otu000335" "Otu000412" "Otu000425"
## [21] "Otu000454" "Otu000516" "Otu000523" "Otu000595" "Otu000607"
## [26] "Otu000624" "Otu000810" "Otu000857" "Otu000870" "Otu001126"
## [31] "Otu001319" "Otu001327" "Otu001345" "Otu001480" "Otu002189"
## [36] "Otu002831"
Figure : Above is a scatterplot Gene Significance (GS) for bacterial production vs. Module Membership (MM) in the each of the different modules.
Modules with a high & significant correlations:
for (i in names(geneModuleMembership)) {
# Pull out the module we're working on
module <- i
# Find the index in the column of the dataframe
column <- match(module, modNames)
# Pull out the Gene Significance vs module membership of the module
moduleGenes = moduleColors == module
vecOTUnames = rownames(geneTraitSignificance)
print(paste("There are ", length(vecOTUnames[moduleGenes]), " OTUs in the ", module, " module.", sep = ""))
# NOTE: This makes hidden variables with the OTU names: brown_OTUs, yellow_OTUs
assign(paste(module, "_OTUs", sep = ""), vecOTUnames[moduleGenes])
}
## [1] "There are 61 OTUs in the blue module."
## [1] "There are 32 OTUs in the green module."
## [1] "There are 38 OTUs in the brown module."
## [1] "There are 315 OTUs in the turquoise module."
## [1] "There are 36 OTUs in the yellow module."
From the plot above, it looks like the blue module has the largest positive correlation with bacterial production.
Which bacterial taxa make up this module?
We just identified modules with high association with the trait of interest (i.e. brown & red, but not magenta or midnightblue), now let’s merge this information with gene annotation information and write out a file that summarizes the results.
# Combine pval, taxonomy, module membership, and gene significance into one dataframe
# Prepare pvalue df
GSpval <- GSPvalue %>%
tibble::rownames_to_column(var = "OTU")
# Prepare taxonomy df
tax_df <- tax_table(prod_clr_physeq) %>%
data.frame() %>%
tibble::rownames_to_column(var = "OTU") %>%
rename(Domain = Rank1, Phylum = Rank2, Class = Rank3, Order = Rank4, Family = Rank5, Genus = Rank6, Species = Rank7)
# Prepare module membership df
# Modify column names
#names(geneModuleMembership) <- substring(names(geneModuleMembership), 3)
gMM_df <- geneModuleMembership %>%
tibble::rownames_to_column(var = "OTU") %>%
gather(key = "moduleColor", value = "moduleMemberCorr", -OTU)
# Prepare gene significance df
GS_bacprod_df <- geneTraitSignificance %>%
data.frame() %>%
tibble::rownames_to_column(var = "OTU")
# Put everything together
allData_df <- gMM_df %>%
left_join(GS_bacprod_df, by = "OTU") %>%
left_join(GSpval, by = "OTU") %>%
left_join(tax_df, by = "OTU")
# Write a file
write.csv(allData_df, file = "data/WGCNA/allData_df_WGCNA.csv")
# Calculate topological overlap.
# Calculate during module detection, but calculating again here:
dissTOM <- 1-TOMsimilarityFromExpr(datOTUClr, power = 3)
## TOM calculation: adjacency..
## ..will not use multithreading.
## Fraction of slow calculations: 0.000000
## ..connectivity..
## ..matrix multiplication (system BLAS)..
## ..normalization..
## ..done.
# Transform with a power to make moderately strong connectsion more visible in the heatmap
plotTOM <- dissTOM^7
# Set diagnol to NA for a nicer plot
diag(plotTOM) <- NA
######################## ALL GENES - even though it looks like subsampling
nSelect = 482
nGenes = ncol(datOTUClr)
nSamples = nrow(datOTUClr)
# For reproducibility, we set the random seed
set.seed(10);
select = sample(nGenes, size = nSelect);
selectTOM = dissTOM[select, select];
# There’s no simple way of restricting a clustering tree to a subset of genes, so we must re-cluster.
selectTree = hclust(as.dist(selectTOM), method = "average")
selectColors = moduleColors[select];
# Open a graphical window
sizeGrWindow(9,9)
# Taking the dissimilarity to a power, say 10, makes the plot more informative by effectively changing
# the color palette; setting the diagonal to NA also improves the clarity of the plot
plotDiss = selectTOM^7;
diag(plotDiss) = NA;
TOMplot(plotDiss, selectTree, selectColors, main = "Network heatmap plot, selected genes")
Figure: Visualizing the OTU network using a heatmap plot. The heatmap depicts the Topological Overlap Matrix (TOM) among all OTUs in the analysis. Light color represents low overlap and progressively darker red color represents higher overlap. Blocks of darker colors along the diagonal are the modules. The gene dendrogram and module assignment are also shown along the left side and the top.
# Reclaculate module eigengenes
#MEs = moduleEigengenes(datOTUClr, moduleColors)$eigengenes
# Isolate weight from the clinical traits
bacprod <- as.data.frame(datContinuous$tot_bacprod)
names(bacprod) = "bacprod"
# Add the weight to existing module eigengenes
MET <- orderMEs(cbind(MEs, bacprod))
#names(MET) <- substring(names(MET), 3)
# Plot the dendogram
sizeGrWindow(6,6)
par(cex = 1.0)
plotEigengeneNetworks(MET, "Eigengene dendogram", marDendro = c(0,4,2,0),
plotHeatmaps = FALSE)
# Plot the heatmap
par(cex = 1.0)
plotEigengeneNetworks(MET, "Eigengene adjacency heatmap", marHeatmap = c(3,4,2,2),
plotDendograms = TRUE, xLabeles = 90)
Figure: Visualization of the eigengene network representing the relationships among the modules and bacterial production. Left panel shows a hierarchical clustering dendrogram of the eigengenes in which the dissimilarity of eigengenes EI , EJ is given by 1 − cor(EI , EJ ). The heatmap in the right panel shows the eigengene adjacency AIJ = (1 + cor(EI , EJ ))/2.
library(DT)
# Module Membership
mm_df <- geneModuleMembership %>%
tibble::rownames_to_column(var = "OTU") %>%
mutate(blue = round(abs(blue), digits = 3),
green = round(abs(green), digits = 3),
brown = round(abs(brown), digits = 3),
turquoise = round(abs(turquoise), digits = 3),
yellow = round(abs(yellow), digits = 3)) %>%
left_join(tax_df, by = "OTU")
# OTUs from Final figure
dplyr::filter(mm_df, OTU == "Otu000173")
## OTU blue green brown turquoise yellow Domain Phylum
## 1 Otu000173 0.411 0.175 0.176 0.496 0.052 Bacteria Bacteroidetes
## Class Order Family Genus Species
## 1 Flavobacteriia Flavobacteriales bacII bacII-A Unclassified
dplyr::filter(mm_df, OTU == "Otu000029")
## OTU blue green brown turquoise yellow Domain Phylum
## 1 Otu000029 0.463 0.351 0.044 0.616 0.022 Bacteria Bacteroidetes
## Class Order Family Genus Species
## 1 Cytophagia Cytophagales bacIII bacIII-B Algor
# These OTUs should be in the blue modules:
dplyr::filter(mm_df, OTU %in% blue_OTUs) %>%
datatable(options = list(pageLength = 10), caption = 'List of OTUs in blue module')
# Green OTUs
dplyr::filter(mm_df, OTU %in% green_OTUs) %>%
datatable(options = list(pageLength = 10), caption = 'List of OTUs in green module')
# brown
dplyr::filter(mm_df, OTU %in% brown_OTUs) %>%
datatable(options = list(pageLength = 10), caption = 'List of OTUs in brown module')
# turquoise
dplyr::filter(mm_df, OTU %in% turquoise_OTUs) %>%
datatable(options = list(pageLength = 10), caption = 'List of OTUs in turquoise module')
# yellow
dplyr::filter(mm_df, OTU %in% yellow_OTUs) %>%
datatable(options = list(pageLength = 10), caption = 'List of OTUs in yellow module')