Step 1. Meta-transcriptomics

Load all the packages used in the analysis

library("stringr")
library("dplyr")
library("DESeq2")
library("ggplot2")
library("tibble")
library("AnnotationHub")
library("AnnotationDbi")
library("readr")
library("dplyr")
library("clusterProfiler")
library("GenomicFeatures")
library("GO.db")
library("BiocParallel")
library("DT")
library("WGCNA")
plot_labeling_size <- 15
prefix <- "t"

import data and intial preprocessing

#input raw metatranscriptomics data
setwd("/Users/shashankgupta/Desktop/ImprovAFish/github/ImprovaFish/MetaMetaTranscriptomics/")
dds_metatranscriptomics <- readRDS("/Users/shashankgupta/Desktop/ImprovAFish/github/ImprovaFish/MetaMetaTranscriptomics/dds_metatranscriptomics.rds")
dim(dds_metatranscriptomics)
## [1] 117261    151
dds_metatranscriptomics <- dds_metatranscriptomics[ , dds_metatranscriptomics$TimePoint != "T0"]
keep <- rowSums(counts(dds_metatranscriptomics) >=5) >= 5
dds_metatranscriptomics <- dds_metatranscriptomics[keep,]
dim(dds_metatranscriptomics)
## [1] 73128   139
metatranscriptomics <- t(assay(vst(dds_metatranscriptomics)))

WGCNA analysis

dim(metatranscriptomics) %>% paste(c("Samples", "metatranscriptomics"))
powers <- c(c(1:10), seq(from = 12, to=20, by=2))
sft <- pickSoftThreshold(metatranscriptomics, 
                         powerVector = powers, 
                         verbose = 1, 
                         networkType = "signed",
                         corFn= "bicor")
saveRDS(sft, "/Users/shashankgupta/Desktop/ImprovAFish/github/ImprovaFish/OmicsIntegration/sft.rds")
sft <- readRDS("/Users/shashankgupta/Desktop/ImprovAFish/github/ImprovaFish/OmicsIntegration/sft.rds")
idx <- min(which((-sign(sft$fitIndices[,3])*sft$fitIndices[,2]) > 0.90))
if(is.infinite(idx)){
  idx <- min(which((-sign(sft$fitIndices[,3])*sft$fitIndices[,2]) > 0.80))
  if(!is.infinite(idx)){
    st <- sft$fitIndices[idx,1]
  } else{
    idx <- which.max(-sign(sft$fitIndices[,3])*sft$fitIndices[,2])
    st <- sft$fitIndices[idx,1]
  }
} else{
  st <- sft$fitIndices[idx,1]
}

data.frame(Indices = sft$fitIndices[,1],
           sfApprox = -sign(sft$fitIndices[,3])*sft$fitIndices[,2]) %>% 
  ggplot() + 
  geom_hline(yintercept = 0.9, color = "red", alpha = 0.6) + # corresponds to R^2 cut-off of 0.9
  geom_hline(yintercept = 0.8, color = "red", alpha = 0.2) + # corresponds to R^2 cut-off of 0.8
  geom_line(aes(x = Indices, y = sfApprox), color = "red", alpha = 0.1, size = 2.5) +
  geom_text(mapping = aes(x = Indices, y = sfApprox, label = Indices), color = "red", size = 4) +
  ggtitle("Scale independence") +
  xlab("Soft Threshold (power)") +
  ylab("SF Model Fit,signed R^2") +
  xlim(1,20) +
  ylim(-1,1) + theme_bw() +
  geom_segment(aes(x = st, y = 0.25, xend = st, yend = sfApprox[idx]-0.05), 
               arrow = arrow(length = unit(0.2,"cm")), 
               size = 0.5)-> scale_independence_plot 



data.frame(Indices = sft$fitIndices[,1],
           meanApprox = sft$fitIndices[,5]) %>% 
  ggplot() + 
  geom_line(aes(x = Indices, y = meanApprox), color = "red", alpha = 0.1, size = 2.5) +
  geom_text(mapping = aes(x = Indices, y = meanApprox, label = Indices), color = "red", size = 4) +
  xlab("Soft Threshold (power)") +
  ylab("Mean Connectivity") +theme_bw() +
  geom_segment(aes(x = st-0.4, 
                   y = sft$fitIndices$mean.k.[idx], 
                   xend = 0, 
                   yend = sft$fitIndices$mean.k.[idx]),
               arrow = arrow(length = unit(0.2,"cm")), 
               size = 0.4) +
  ggtitle(paste0("Mean connectivity: ", 
                 round(sft$fitIndices$mean.k.[idx],2))) -> mean_connectivity_plot


cowplot::plot_grid(scale_independence_plot, mean_connectivity_plot, ncol = 2, align = "h", labels = c("A", "B"), label_size = plot_labeling_size) -> si_mc_plot

si_mc_plot

modules.omics.MetaTrans <- blockwiseModules(metatranscriptomics, 
                                    power = st, 
                                    networkType = "signed", 
                                    TOMType = "signed",
                                    corType = "bicor",
                                    #maxPOutliers = 0.05,
                                    #deepSplit = 4, # Default 2
                                    minModuleSize = 21,           # 30
                                    #minCoreKME = 0.5,            # Default 0.5
                                    #minCoreKMESize = 2,          # Default minModuleSize/3,
                                    #minKMEtoStay = 0.5,          # Default 0.3
                                    #reassignThreshold = 0,       # Default 1e-6
                                    #mergeCutHeight = 0.2,        # Default 0.15
                                    #pamStage = pam_stage, 
                                    #pamRespectsDendro = TRUE,
                                    #replaceMissingAdjacencies = TRUE,
                                    nThreads = 10,
                                    numericLabels = TRUE,
                                    saveTOMs = FALSE,
                                    saveTOMFileBase = "HOST_TOM",
                                    verbose = 3,
                                    maxBlockSize=8000)
hubs <- chooseTopHubInEachModule(metatranscriptomics, modules.omics.MetaTrans$colors, power = st, omitColors = "0")
stage2results_MetaTrans <- list(modules = modules.omics.MetaTrans, 
                        hubs = hubs)
saveRDS(stage2results_MetaTrans, "/Users/shashankgupta/Desktop/ImprovAFish/github/ImprovaFish/OmicsIntegration/stage2results_MetaTranscriptomics.rds")
saveRDS(modules.omics.MetaTrans, "/Users/shashankgupta/Desktop/ImprovAFish/github/ImprovaFish/OmicsIntegration/modules.omics.MetaTrans.rds")
modules.omics.MetaTrans <- readRDS("/Users/shashankgupta/Desktop/ImprovAFish/github/ImprovaFish/OmicsIntegration/modules.omics.MetaTrans.rds")
stage2results_MetaTrans <- readRDS("/Users/shashankgupta/Desktop/ImprovAFish/github/ImprovaFish/OmicsIntegration/stage2results_MetaTranscriptomics.rds")

Convert labels to colors for plotting

merged_colors <- labels2colors(stage2results_MetaTrans$modules$colors)
n_modules <- unique(merged_colors) %>% length()

samples_good <- sum(stage2results_MetaTrans$modules$goodSamples) == length(stage2results_MetaTrans$modules$goodSamples)
genes_good <- sum(stage2results_MetaTrans$modules$goodGenes) == length(stage2results_MetaTrans$modules$goodGenes)

ME_good <- sum(stage2results_MetaTrans$modules$MEsOK) == length(stage2results_MetaTrans$modules$MEsOK)
table(stage2results_MetaTrans$modules$colors) %>% 
  as.data.frame() %>% 
  dplyr::rename(Module = Var1, Size = Freq) %>%
  dplyr::mutate(Module_color = labels2colors(as.numeric(as.character(Module)))) -> module_size

module_size %>% 
  ggplot(aes(x = Module, y = Size, fill = Module)) +
  geom_col(color =  "#000000") +
  ggtitle("Number of genes in each module") +
  theme(legend.position = "none") + 
  scale_fill_manual(values = setNames(module_size$Module_color,module_size$Module)) +
  geom_text(aes(label = Size),vjust = 0.5, hjust = -0.18, size = 3.5) +
  ylim(0, max(module_size$Size)*1.1) +
  theme(plot.margin = margin(2, 2, 2, 2, "pt")) +
  coord_flip()-> module_size_barplot

module_size_barplot + theme_bw()

table(stage2results_MetaTrans$modules$colors) %>% as.data.frame() -> res
res$`Module color` <- WGCNA::labels2colors(as.numeric(as.character(res$Var1)))
res <- res[, c(1,3,2)]
colnames(res) <- c("Module", "Module color", "Number of metatranscriptomics")
# Plot the dendrogram and the module colors underneath for each block
for(i in seq_along(stage2results_MetaTrans$modules$dendrograms)){
  plotDendroAndColors(stage2results_MetaTrans$modules$dendrograms[[i]], merged_colors[stage2results_MetaTrans$modules$blockGenes[[i]]],
                      "Module colors",
                      dendroLabels = FALSE, hang = 0.03,
                      addGuide = TRUE, guideHang = 0.05,
                      main = paste0("Cluster Dendrogram\n", 
                                    "for block ", 
                                    i,": ",
                                    length(stage2results_MetaTrans$modules$blockGenes[[i]]),
                                    " metatranscriptomics"))
}
MEs <- stage2results_MetaTrans$modules$MEs
# Module correlation to other modules
MEs_R <- bicor(MEs, MEs, maxPOutliers = 0.05)
idx.r <- which(rownames(MEs_R) == "ME0")
idx.c <- which(colnames(MEs_R) == "ME0")
MEs_R_noME0 <- MEs_R[-idx.r, -idx.c]

MEs_R_noME0[upper.tri(MEs_R_noME0)] %>% 
  as.data.frame() %>% 
  dplyr::rename("correlation" = ".") %>% 
  ggplot(aes(x=correlation)) + 
  geom_histogram(bins = 20) +
  #geom_density() + 
  xlim(-1, 1) +
  ggtitle(paste0(prefix,"ME correlation\n w/o ",prefix ,"ME0")) -> MEs_R_density

pheatmap::pheatmap(MEs_R_noME0, color = colorRampPalette(c("Blue", "White", "Red"))(100),
                   silent = T, 
                   breaks = seq(-1,1,length.out = 101),
                   treeheight_row = 5, 
                   treeheight_col = 5,
                   main = paste0(prefix,"ME correlation heatmap w/o ",prefix ,"ME0"),
                   labels_row = paste0(prefix, rownames(MEs_R)),
                   labels_col = paste0(prefix, colnames(MEs_R))) -> MEs_R_Corr

cowplot::plot_grid(MEs_R_density, MEs_R_Corr$gtable, labels = c("D", "E"), label_size = plot_labeling_size, rel_widths = c(0.6, 1)) -> density_eigen

all(rownames(metatranscriptomics) == rownames(MEs))
dim(metatranscriptomics) %>% paste0(c(" samples", " metatranscriptomics"))
kME <- bicor(metatranscriptomics, MEs, maxPOutliers = 0.05)
dim(kME) %>% paste0(c(" metatranscriptomics", " modules"))

intra_cor <- c()
for (i in 1:ncol(metatranscriptomics)) {
  m <- stage2results_MetaTrans$modules$colors[i]
  intra_cor[i] <- kME[i, paste0("ME", m)]
  if(m != 0){
    intra_cor[i] <- kME[i, paste0("ME", m)]
  } else{
    intra_cor[i] <- NA
  }
  
}

idx <- which(is.na(intra_cor))
intra_cor <- intra_cor[-idx]
# Corr within modules
corr_within_module <- function(metatranscriptomics, modules, module_x = 1){
  idx.metatranscriptomics <- which(modules$colors == module_x)
  idx.me <- which(colnames(modules$MEs) == paste0("ME",module_x))
  kME_x <- bicor(metatranscriptomics[,idx.metatranscriptomics], modules$MEs[,idx.me], maxPOutliers = 0.05)
  kME_x
}

ggplot.list <- list()

for(m in colnames(stage2results_MetaTrans$modules$MEs)){
  h <- as.numeric(sub("ME","", m))
  data.frame(x = suppressWarnings(corr_within_module(metatranscriptomics = metatranscriptomics, modules = stage2results_MetaTrans$modules, module_x = h))) %>% 
    ggplot() + 
    #geom_density(aes(x = x), fill = labels2colors(h), color = "black", alpha = 0.5) + 
    geom_histogram(aes(x), fill = labels2colors(h), color = "black", alpha = 0.5, bins = 20) + 
    xlim(-1, 1) +
    xlab("metatranscriptomics correlation")+
    ggtitle(paste0(prefix,m)) -> da_plot
  
  ggplot.list[[m]] <- da_plot
}

ggplot.list <- ggplot.list[ggplot.list %>% names() %>% sub("ME", "", .) %>% as.numeric() %>% order()]

cowplot::plot_grid(plotlist = ggplot.list, ncol = 6) -> density_all_plot
cowplot::plot_grid(si_mc_plot , density_eigen, ncol = 1, rel_heights = c(0.8,1)) -> part_1
cowplot::plot_grid(part_1, module_size_barplot, labels = c("", "C"), label_size = plot_labeling_size, rel_widths = c(1,0.5)) -> part_2
cowplot::plot_grid(part_2, density_all_plot, ncol = 1, rel_heights = c(0.8,1), labels = c("", "F"), label_size = plot_labeling_size)

Deseq2 analysis

dds_metatranscriptomics$group <- factor(paste0(dds_metatranscriptomics$TimePoint, dds_metatranscriptomics$Treatment))
design(dds_metatranscriptomics) <- ~ group-1
dds_metatranscriptomics <- DESeq(dds_metatranscriptomics, parallel = T)
resultsNames(dds_metatranscriptomics)
##  [1] "groupT1ctr" "groupT1mc1" "groupT1mc2" "groupT1mn3" "groupT2ctr"
##  [6] "groupT2mc1" "groupT2mc2" "groupT2mn3" "groupT3ctr" "groupT3mc1"
## [11] "groupT3mc2" "groupT3mn3"
annot_function_metatrans <- read_tsv("/Users/shashankgupta/Desktop/ImprovAFish/github/ImprovaFish/MetaMetaTranscriptomics/Total.DRAM.STx.bac.k2.total.annotations.tsv")
annot_function_metatrans <- annot_function_metatrans[, c("ID", "kegg_hit", "uniref_taxonomy","pfam_hits", "cazy_hits" )]

Time point (T1)

Diet 1
res <- results(dds_metatranscriptomics,
               contrast = list("groupT1mc1","groupT1ctr"))
res_tbl <- as_tibble(res, rownames = "ENSEMBL") %>%
  filter(padj <0.05)%>%
  arrange(padj) 
res_tbl <- merge(res_tbl, annot_function_metatrans, by.x = "ENSEMBL", by.y = "ID", all.x = TRUE )
# res_tbl %>% 
#   filter(log2FoldChange > 0) %>%
#   select(pfam_hits, uniref_taxonomy)
DT::datatable(res_tbl)
paste("Total number of upregulated transcripts is",length(res_tbl$log2FoldChange[res_tbl$log2FoldChange>0])[1],"and downregulated is",length(res_tbl$log2FoldChange[res_tbl$log2FoldChange<0])[1])
## [1] "Total number of upregulated transcripts is 10 and downregulated is 15"
Diet 2
res <- results(dds_metatranscriptomics,
               contrast = list("groupT1mc2","groupT1ctr"))
res_tbl <- as_tibble(res, rownames = "ENSEMBL") %>%
  filter(padj <0.05)%>%
  arrange(padj) 
res_tbl <- merge(res_tbl, annot_function_metatrans, by.x = "ENSEMBL", by.y = "ID", all.x = TRUE )
# res_tbl %>% 
#   filter(log2FoldChange > 0) %>%
#   select(pfam_hits, uniref_taxonomy)
DT::datatable(res_tbl)
paste("Total number of upregulated transcripts is",length(res_tbl$log2FoldChange[res_tbl$log2FoldChange>0])[1],"and downregulated is",length(res_tbl$log2FoldChange[res_tbl$log2FoldChange<0])[1])
## [1] "Total number of upregulated transcripts is 14 and downregulated is 9"
Diet 3
res <- results(dds_metatranscriptomics,
               contrast = list("groupT1mn3","groupT1ctr"))
res_tbl <- as_tibble(res, rownames = "ENSEMBL") %>%
  filter(padj <0.05)%>%
  arrange(padj) 
res_tbl <- merge(res_tbl, annot_function_metatrans, by.x = "ENSEMBL", by.y = "ID", all.x = TRUE )
# res_tbl %>% 
#   filter(log2FoldChange > 0) %>%
#   select(pfam_hits, uniref_taxonomy)
DT::datatable(res_tbl)
paste("Total number of upregulated transcripts is",length(res_tbl$log2FoldChange[res_tbl$log2FoldChange>0])[1],"and downregulated is",length(res_tbl$log2FoldChange[res_tbl$log2FoldChange<0])[1])
## [1] "Total number of upregulated transcripts is 11 and downregulated is 1"

Time point (T2)

Diet 1
res <- results(dds_metatranscriptomics,
               contrast = list("groupT2mc1","groupT2ctr"))
res_tbl <- as_tibble(res, rownames = "ENSEMBL") %>%
  filter(padj <0.05)%>%
  arrange(padj) 
res_tbl <- merge(res_tbl, annot_function_metatrans, by.x = "ENSEMBL", by.y = "ID", all.x = TRUE )
# res_tbl %>% 
#   filter(log2FoldChange > 0) %>%
#   select(pfam_hits, uniref_taxonomy)
DT::datatable(res_tbl)
paste("Total number of upregulated transcripts is",length(res_tbl$log2FoldChange[res_tbl$log2FoldChange>0])[1],"and downregulated is",length(res_tbl$log2FoldChange[res_tbl$log2FoldChange<0])[1])
## [1] "Total number of upregulated transcripts is 6 and downregulated is 2"
Diet 2
res <- results(dds_metatranscriptomics,
               contrast = list("groupT2mc2","groupT2ctr"))
res_tbl <- as_tibble(res, rownames = "ENSEMBL") %>%
  filter(padj <0.05)%>%
  arrange(padj) 

res_tbl <- merge(res_tbl, annot_function_metatrans, by.x = "ENSEMBL", by.y = "ID", all.x = TRUE )
# res_tbl %>% 
#   filter(log2FoldChange > 0) %>%
#   select(pfam_hits, uniref_taxonomy)
DT::datatable(res_tbl)
paste("Total number of upregulated transcripts is",length(res_tbl$log2FoldChange[res_tbl$log2FoldChange>0])[1],"and downregulated is",length(res_tbl$log2FoldChange[res_tbl$log2FoldChange<0])[1])
## [1] "Total number of upregulated transcripts is 32 and downregulated is 3"
Diet 3
res <- results(dds_metatranscriptomics,
               contrast = list("groupT2mn3","groupT2ctr"))
res_tbl <- as_tibble(res, rownames = "ENSEMBL") %>%
  filter(padj <0.05)%>%
  arrange(padj) 
res_tbl <- merge(res_tbl, annot_function_metatrans, by.x = "ENSEMBL", by.y = "ID", all.x = TRUE )
# res_tbl %>% 
#   filter(log2FoldChange > 0) %>%
#   select(pfam_hits, uniref_taxonomy)
DT::datatable(res_tbl)
paste("Total number of upregulated transcripts is",length(res_tbl$log2FoldChange[res_tbl$log2FoldChange>0])[1],"and downregulated is",length(res_tbl$log2FoldChange[res_tbl$log2FoldChange<0])[1])
## [1] "Total number of upregulated transcripts is 36 and downregulated is 0"

Time point (T3)

Diet 1
res <- results(dds_metatranscriptomics,
               contrast = list("groupT3mc1","groupT3ctr"))
res_tbl <- as_tibble(res, rownames = "ENSEMBL") %>%
  filter(padj <0.05)%>%
  arrange(padj) 
res_tbl <- merge(res_tbl, annot_function_metatrans, by.x = "ENSEMBL", by.y = "ID", all.x = TRUE )
# res_tbl %>% 
#   filter(log2FoldChange > 0) %>%
#   select(pfam_hits, uniref_taxonomy)
DT::datatable(res_tbl)
paste("Total number of upregulated transcripts is",length(res_tbl$log2FoldChange[res_tbl$log2FoldChange>0])[1],"and downregulated is",length(res_tbl$log2FoldChange[res_tbl$log2FoldChange<0])[1])
## [1] "Total number of upregulated transcripts is 10 and downregulated is 16"
Diet 2
res <- results(dds_metatranscriptomics,
               contrast = list("groupT3mc2","groupT3ctr"))
res_tbl <- as_tibble(res, rownames = "ENSEMBL") %>%
  filter(padj <0.05)%>%
  arrange(padj)
res_tbl <- merge(res_tbl, annot_function_metatrans, by.x = "ENSEMBL", by.y = "ID", all.x = TRUE )
# res_tbl %>% 
#   filter(log2FoldChange > 0) %>%
#   select(pfam_hits, uniref_taxonomy)
DT::datatable(res_tbl)
paste("Total number of upregulated transcripts is",length(res_tbl$log2FoldChange[res_tbl$log2FoldChange>0])[1],"and downregulated is",length(res_tbl$log2FoldChange[res_tbl$log2FoldChange<0])[1])
## [1] "Total number of upregulated transcripts is 11 and downregulated is 9"
Diet 3
res <- results(dds_metatranscriptomics,
               contrast = list("groupT3mn3","groupT3ctr"))
res_tbl <- as_tibble(res, rownames = "ENSEMBL") %>%
  filter(padj <0.05)%>%
  arrange(padj) 
res_tbl <- merge(res_tbl, annot_function_metatrans, by.x = "ENSEMBL", by.y = "ID", all.x = TRUE )
# res_tbl %>% 
#   filter(log2FoldChange > 0) %>%
#   select(pfam_hits, uniref_taxonomy)
DT::datatable(res_tbl)
paste("Total number of upregulated transcripts is",length(res_tbl$log2FoldChange[res_tbl$log2FoldChange>0])[1],"and downregulated is",length(res_tbl$log2FoldChange[res_tbl$log2FoldChange<0])[1])
## [1] "Total number of upregulated transcripts is 12 and downregulated is 11"
# Create the dataframe (loop)
df <- data.frame(Group=character(), a1=character(), a2=character(), a3=character())

# Loop through each group and store the results in the dataframe
for(treatment in c("T1", "T2", "T3")){
  for(group in c("mc1","mc2","mn3")){
    sample_name <- paste0("group",treatment,group)
    ctr_sample <- paste0("group",treatment,"ctr")
    
    # Calculate results
    res <- results(dds_metatranscriptomics, contrast = list(sample_name,ctr_sample))
    
    # Filter the results and count the number of positive elements
    res_tbl <- as_tibble(res, rownames = "ENSEMBL") %>%
      filter(padj <0.05)%>%
      arrange(padj)
    a <- length(res_tbl$log2FoldChange[res_tbl$log2FoldChange>0])
    
    # Store the results in the dataframe
    df <- rbind(df, c(sample_name,a))
  }
}
names(df) <- c("Groups", "DEGs")
DT::datatable(df)

Visualization of differenitally expressed genes

res <- results(dds_metatranscriptomics,contrast = list("groupT3mn3","groupT3ctr")) %>%
  as.data.frame %>%
  add_rownames(var = "Genes") %>%
  filter(padj < 0.05) %>%
  arrange(padj)

cat("DEGs: ", nrow(res))
vsd <- vst(dds_metatranscriptomics)
library(stringi)
p1 <- assay(vsd) %>%
  as.data.frame %>%
  add_rownames(var = "Genes") %>%
  filter(Genes %in% res$Genes[1:23]) %>%
  gather(Sample, Expression, -Genes) %>%
  mutate(Treatment = stri_replace_all_regex(Sample, colnames(dds_metatranscriptomics), dds_metatranscriptomics$Treatment, vectorize=FALSE)) %>%
  ggplot(aes(x = Treatment, y = Expression, color = Treatment)) +
  geom_boxplot() + 
  facet_grid(rows = vars(Genes)) + theme_bw()

p2 <- assay(vsd) %>%
  as.data.frame %>%
  add_rownames(var = "Genes") %>%
  filter(Genes %in% res$Genes[1:23]) %>%
  gather(Sample, Expression, -Genes) %>%
  mutate(TimePoint = stri_replace_all_regex(Sample, colnames(dds_metatranscriptomics), dds_metatranscriptomics$TimePoint, vectorize=FALSE)) %>%
  ggplot(aes(x = TimePoint, y = Expression, color = TimePoint)) +
  geom_boxplot() + 
  facet_grid(rows = vars(Genes)) + theme_bw()

cowplot::plot_grid(plotlist = list(p1,p2), ncol = 2)