Step 1. Metabolomics: RP

Load all the packages

library("MetaboAnalystR")
library("ggplot2")
library("DT")
library("WGCNA")
library("phyloseq")
library("microbiomeMarker")
library("dplyr")
library("microbial")
library("cowplot")
prefix <- "RP"
plot_labeling_size <- 15
parameter_sets <- list(set_1 = list(applied_norm = "TSS", applied_transf = "CLR", assoc_measure = "bicor"),
                       set_2 = list(applied_norm = "CSS", applied_transf = "log2", assoc_measure = "bicor"))

Preprocessing

mSet<-InitDataObjects("conc", "mf", FALSE)
mSet<-SetDesignType(mSet, "multi")
mSet<-Read.TextDataTs(mSet, "/Users/shashankgupta/Desktop/ImprovAFish/Metabolomics/Metabolomics/For_Heatmap_Merge_both_HILIC_RP/RP_df.csv", "colmf")
mSet<-ReadMetaData(mSet, "/Users/shashankgupta/Desktop/ImprovAFish/Metabolomics/Metabolomics/For_Heatmap_Merge_both_HILIC_RP/RP_metadata.csv")
mSet<-SanityCheckData(mSet)
mSet<-ReplaceMin(mSet)
mSet<-SanityCheckMeta(mSet, 1)
mSet<-SetDataTypeOfMeta(mSet)
mSet<-PreparePrenormData(mSet)
mSet<-Normalization(mSet, "MedianNorm", "SrNorm", "NULL", ratio=FALSE, ratioNum=20)
metabolites_RP <- data.frame(mSet$dataSet$norm)
metabolites_RP <- t(metabolites_RP)
#Look for outliers by examining tree of metabolites_RP  
dim(metabolites_RP) %>% paste(c("metabolites_RP", "Samples"))
sampleTree = hclust(dist(metabolites_RP), method = "average");
plot(sampleTree, main = "metabolites_RP clustering to detect outliers", sub="", xlab="", cex.lab = 1.5, cex.axis = 1.5, cex.main = 2)
outlier<-c("N.Lauroylsarcosine...271.21434.Da.608.71.s")
metabolites_RP<- metabolites_RP %>%
  as.data.frame %>%
  filter(!row.names(metabolites_RP) %in% outlier)
sampleTree = hclust(dist(metabolites_RP), method = "average");
plot(sampleTree, main = "metabolites_RP clustering to detect outliers", sub="", xlab="", cex.lab = 1.5, cex.axis = 1.5, cex.main = 2)
#Look for outlier samples by examining tree of samples
#Transpose such that samples are in rows and metabolites_RP are in columns.  
metabolites_RP <- t(metabolites_RP)
dim(metabolites_RP) %>% paste(c("Samples", "metabolites_RP"))
sampleTree = hclust(dist(metabolites_RP), method = "average");
plot(sampleTree, main = "Sample clustering to detect outliers", sub="", xlab="", cex.lab = 1.5, cex.axis = 1.5, cex.main = 2)
# 
# outlier<-c("T3_mc2_tk3_VFA01_aq_P4.D4_1_3462", "T3_mc2_tk3_VFA03_aq_P4.D5_1_3463")
# metabolites_RP<- metabolites_RP %>%
#   as.data.frame %>%
# filter(!row.names(metabolites_RP) %in% outlier)

LefSE analysis

metabolites_RP<-t(metabolites_RP)
raw <- as.matrix(metabolites_RP)
OTU = otu_table(raw, taxa_are_rows = TRUE)
dat <- read.csv("/Users/shashankgupta/Desktop/ImprovAFish/Metabolomics/Metabolomics/For_Heatmap_Merge_both_HILIC_RP/RP_metadata.csv")
row.names(dat) <- dat$Sample
dat$New_Diet <- toupper(dat$Diet)

# Merge into one complete phyloseq object
ps <- merge_phyloseq(otu_table(OTU), sample_data(dat))
tt <- as.data.frame(row.names(metabolites_RP))
row.names(tt) <- tt$`row.names(metabolites_RP)`
colnames(tt)[1] <- "Kingdom"
tax_table(ps) <- as.matrix(tt)
set.seed(12345)

Time point 2

Diet 1

ps_MC1<-subset_samples(ps, Time %in% c("T2") & New_Diet %in% c("CTR", "MC1"))
ps_MC1 <- prune_taxa(taxa_sums(ps_MC1) >0, ps_MC1)
lef_out<-run_lefse(ps_MC1, group = "New_Diet", norm = "CPM", 
                   kw_cutoff = 0.05, lda_cutoff = 0, taxa_rank = "none")

plot_ef_bar(lef_out)

DT::datatable(data.frame(marker_table(lef_out)) %>%
  #filter(enrich_group != "CTR") %>%
  #select(feature) %>%
  `rownames<-`( NULL ))

Diet 2

ps_MC2<-subset_samples(ps, Time %in% c("T2") & New_Diet %in% c("CTR", "MC2"))
ps_MC2 <- prune_taxa(taxa_sums(ps_MC2) >0, ps_MC2)
lef_out<-run_lefse(ps_MC2, group = "New_Diet", norm = "CPM", 
                   kw_cutoff = 0.05, lda_cutoff = 1.75, taxa_rank = "none")

plot_ef_bar(lef_out)

DT::datatable(data.frame(marker_table(lef_out)) %>%
  #filter(enrich_group != "CTR") %>%
  #select(feature) %>%
  `rownames<-`( NULL ))

Diet 3

ps_MN3<-subset_samples(ps, Time %in% c("T2") & New_Diet %in% c("CTR", "MN3"))
ps_MN3 <- prune_taxa(taxa_sums(ps_MN3) >0, ps_MN3)
table(sample_data(ps_MN3)$New_Diet)
## 
## CTR MN3 
##  12  12
lef_out<-run_lefse(ps_MN3, group = "New_Diet", norm = "CPM", 
                   kw_cutoff = 0.05, lda_cutoff = 1.75, taxa_rank = "none")

plot_ef_bar(lef_out)

DT::datatable(data.frame(marker_table(lef_out)) %>%
  #filter(enrich_group != "CTR") %>%
  #select(feature) %>%
  `rownames<-`( NULL ))

Time point 3

Diet 1

ps_MC1<-subset_samples(ps, Time %in% c("T3") & New_Diet %in% c("CTR", "MC1"))
ps_MC1 <- prune_taxa(taxa_sums(ps_MC1) >0, ps_MC1)
lef_out<-run_lefse(ps_MC1, group = "New_Diet", norm = "CPM", 
                   kw_cutoff = 0.05, lda_cutoff = 0, taxa_rank = "none")

plot_ef_bar(lef_out)

DT::datatable(data.frame(marker_table(lef_out)) %>%
  #filter(enrich_group != "CTR") %>%
  #select(feature) %>%
  `rownames<-`( NULL ))

Diet 2

ps_MC2<-subset_samples(ps, Time %in% c("T3") & New_Diet %in% c("CTR", "MC2"))
ps_MC2 <- prune_taxa(taxa_sums(ps_MC2) >0, ps_MC2)
lef_out<-run_lefse(ps_MC2, group = "New_Diet", norm = "CPM", 
                   kw_cutoff = 0.05, lda_cutoff = 2, taxa_rank = "none")

plot_ef_bar(lef_out)

DT::datatable(data.frame(marker_table(lef_out)) %>%
  #filter(enrich_group != "CTR") %>%
  #select(feature) %>%
  `rownames<-`( NULL ))

Diet 3

ps_MN3<-subset_samples(ps, Time %in% c("T3") & New_Diet %in% c("CTR", "MN3"))
ps_MN3 <- prune_taxa(taxa_sums(ps_MN3) >0, ps_MN3)
lef_out<-run_lefse(ps_MN3, group = "New_Diet", norm = "CPM", 
                   kw_cutoff = 0.05, lda_cutoff = 2, taxa_rank = "none")

plot_ef_bar(lef_out)

DT::datatable(data.frame(marker_table(lef_out)) %>%
  #filter(enrich_group != "CTR") %>%
  #select(feature) %>%
  `rownames<-`( NULL ))
#loop
output_table <- data.frame()

for (i in c("T2", "T3")){
  for (j in c("MC1","MC2", "MN3")){
    phy<-subset_samples(ps, Time %in% c(i) & New_Diet %in% c("CTR", j))
    phy <- prune_taxa(taxa_sums(phy) >0, phy)
    lef_out<-run_lefse(phy,group = "New_Diet", norm = "CPM", 
                       kw_cutoff = 0.05, lda_cutoff = 2, taxa_rank = "none")
    assign(paste0("HILIC_metabolomics", i, j), plot_abundance(lef_out, group = "New_Diet"))
    output_table[i,j]<-sum(marker_table(lef_out)$enrich_group == j)
  }
}
#colnames(output_table)<-c("MC1", "MC2", "MN3")
#rownames(output_table) <- c("T1","T2","T3")
t(output_table)

plot_grid(HILIC_metabolomicsT2MC1, HILIC_metabolomicsT2MC2, HILIC_metabolomicsT2MN3, 
          HILIC_metabolomicsT3MC1, HILIC_metabolomicsT3MC2, HILIC_metabolomicsT3MN3, 
          labels = "AUTO", ncol = 1, align = "hv", rel_heights = c(0.3,0.3,1.2,2.5,2.8,2.8))
# portrait 25 *10
#plot_grid(poteomicsT3MC1, poteomicsT3MC2, poteomicsT3MN3, labels = "AUTO", ncol = 1, align = "hv")

Step2. Host-microbiome interactions

WGCNA analysis

#Transpose such that samples are in rows and metabolites_RP are in columns.  
metabolites_RP <- t(metabolites_RP)
dim(metabolites_RP) %>% paste(c("Samples", "metabolites_1"))
powers <- c(1:10, seq(12,30,2))
sft <- pickSoftThreshold(metabolites_RP, 
                         powerVector = powers, 
                         verbose = 1, 
                         networkType = "signed",
                         corFn= "bicor")

idx <- min(which((-sign(sft$fitIndices[,3])*sft$fitIndices[,2]) > 0.90))
if(is.infinite(idx)){
  idx <- min(which((-sign(sft$fitIndices[,3])*sft$fitIndices[,2]) > 0.80))
  if(!is.infinite(idx)){
    st <- sft$fitIndices[idx,1]
  } else{
    idx <- which.max(-sign(sft$fitIndices[,3])*sft$fitIndices[,2])
    st <- sft$fitIndices[idx,1]
  }
} else{
  st <- sft$fitIndices[idx,1]
}

data.frame(Indices = sft$fitIndices[,1],
           sfApprox = -sign(sft$fitIndices[,3])*sft$fitIndices[,2]) %>% 
  ggplot() + 
  geom_hline(yintercept = 0.9, color = "red", alpha = 0.6) + # corresponds to R^2 cut-off of 0.9
  geom_hline(yintercept = 0.8, color = "red", alpha = 0.2) + # corresponds to R^2 cut-off of 0.8
  geom_line(aes(x = Indices, y = sfApprox), color = "red", alpha = 0.1, size = 2.5) +
  geom_text(mapping = aes(x = Indices, y = sfApprox, label = Indices), color = "red", size = 4) +
  ggtitle("Scale independence") +
  xlab("Soft Threshold (power)") +
  ylab("SF Model Fit,signed R^2") +
  xlim(1,30) + theme_bw() +
  ylim(-1,1) +
  geom_segment(aes(x = st, y = 0.25, xend = st, yend = sfApprox[idx]-0.05), 
               arrow = arrow(length = unit(0.2,"cm")), 
               size = 0.5)-> scale_independence_plot 



data.frame(Indices = sft$fitIndices[,1],
           meanApprox = sft$fitIndices[,5]) %>% 
  ggplot() + 
  geom_line(aes(x = Indices, y = meanApprox), color = "red", alpha = 0.1, size = 2.5) +
  geom_text(mapping = aes(x = Indices, y = meanApprox, label = Indices), color = "red", size = 4) +
  xlab("Soft Threshold (power)") +
  ylab("Mean Connectivity") +theme_bw() +
  geom_segment(aes(x = st-0.4, 
                   y = sft$fitIndices$mean.k.[idx], 
                   xend = 0, 
                   yend = sft$fitIndices$mean.k.[idx]),
               arrow = arrow(length = unit(0.2,"cm")), 
               size = 0.4) +
  ggtitle(paste0("Mean connectivity: ", 
                 round(sft$fitIndices$mean.k.[idx],2))) -> mean_connectivity_plot


cowplot::plot_grid(scale_independence_plot, mean_connectivity_plot, ncol = 2, align = "h", labels = c("A", "B"), label_size = plot_labeling_size) -> si_mc_plot

si_mc_plot

modules.omics.RP <- blockwiseModules(metabolites_RP,
                                    power = st, 
                                    networkType = "signed", 
                                    TOMType = "signed",
                                    corType = "bicor",
                                    #maxPOutliers = 0.05,
                                    #deepSplit = 4, # Default 2
                                    minModuleSize = 12, # 30
                                    #minCoreKME = 0.5,      # Default 0.5
                                    #minCoreKMESize = 2,    # Default minModuleSize/3,
                                    #minKMEtoStay = 0.5,    # Default 0.3
                                    #reassignThreshold = 0, # Default 1e-6
                                    #mergeCutHeight = 0.4,  # Default 0.15
                                    #pamStage = pam_stage, 
                                    #pamRespectsDendro = TRUE,
                                    #replaceMissingAdjacencies = TRUE,
                                    numericLabels = TRUE,
                                    saveTOMs = FALSE,
                                    saveTOMFileBase = "TOM",
                                    verbose = 3,
                                    maxBlockSize=8000, nThreads = 10)


hubs_M <- chooseTopHubInEachModule(metabolites_RP, modules.omics.RP$colors, power = st, omitColors = "0")

stage2results_RP <- list(modules = modules.omics.RP, 
                        hubs = hubs_M)

# Save result for omics integration
saveRDS(stage2results_RP, "/Users/shashankgupta/Desktop/ImprovAFish/github/ImprovaFish/OmicsIntegration/stage2results_RP.rds")
# Convert labels to colors for plotting
merged_colors <- labels2colors(stage2results_RP$modules$colors)
n_modules <- unique(merged_colors) %>% length()

samples_good <- sum(stage2results_RP$modules$goodSamples) == length(stage2results_RP$modules$goodSamples)
genes_good <- sum(stage2results_RP$modules$goodGenes) == length(stage2results_RP$modules$goodGenes)

ME_good <- sum(stage2results_RP$modules$MEsOK) == length(stage2results_RP$modules$MEsOK)
table(stage2results_RP$modules$colors) %>% 
  as.data.frame() %>% 
  dplyr::rename(Module = Var1, Size = Freq) %>%
  dplyr::mutate(Module_color = labels2colors(as.numeric(as.character(Module)))) -> module_size

module_size %>% 
  ggplot(aes(x = Module, y = Size, fill = Module)) +
  geom_col(color =  "#000000") +
  ggtitle("Number of genes in each module") +
  theme(legend.position = "none") + 
  theme_bw()+
  scale_fill_manual(values = setNames(module_size$Module_color,module_size$Module)) +
  geom_text(aes(label = Size),vjust = 0.5, hjust = -0.18, size = 3.5) +
  ylim(0, max(module_size$Size)*1.1) + theme_bw() +
  theme(plot.margin = margin(2, 2, 2, 2, "pt")) +
  coord_flip()-> module_size_barplot

module_size_barplot

table(stage2results_RP$modules$colors) %>% as.data.frame() -> res
res$`Module color` <- WGCNA::labels2colors(as.numeric(as.character(res$Var1)))
res <- res[, c(1,3,2)]
colnames(res) <- c("Module", "Module color", "Number of metabolites_RP")
# Plot the dendrogram and the module colors underneath for each block
for(i in seq_along(stage2results_RP$modules$dendrograms)){
  plotDendroAndColors(stage2results_RP$modules$dendrograms[[i]], merged_colors[stage2results_RP$modules$blockGenes[[i]]],
                      "Module colors",
                      dendroLabels = FALSE, hang = 0.03,
                      addGuide = TRUE, guideHang = 0.05,
                      main = paste0("Cluster Dendrogram\n", 
                                    "for block ", 
                                    i,": ",
                                    length(stage2results_RP$modules$blockGenes[[i]]),
                                    " metabolites_RP"))
}
MEs <- stage2results_RP$modules$MEs

# Module correlation to other modules
MEs_R <- bicor(MEs, MEs, maxPOutliers = 0.05)

idx.r <- which(rownames(MEs_R) == "ME0")
idx.c <- which(colnames(MEs_R) == "ME0")

MEs_R_noME0 <- MEs_R[-idx.r, -idx.c]


MEs_R_noME0[upper.tri(MEs_R_noME0)] %>% 
  as.data.frame() %>% 
  dplyr::rename("correlation" = ".") %>% 
  ggplot(aes(x=correlation)) + 
  geom_histogram(bins = 20) +
  theme_bw() +
  #geom_density() + 
  xlim(-1, 1) +
  ggtitle(paste0(prefix,"ME correlation\n w/o ",prefix ,"ME0")) -> MEs_R_density

pheatmap::pheatmap(MEs_R_noME0, color = colorRampPalette(c("Blue", "White", "Red"))(100),
                   silent = T, 
                   breaks = seq(-1,1,length.out = 101),
                   treeheight_row = 5, 
                   treeheight_col = 5,
                   main = paste0(prefix,"ME correlation heatmap w/o ",prefix ,"ME0"),
                   labels_row = paste0(prefix, rownames(MEs_R)),
                   labels_col = paste0(prefix, colnames(MEs_R))) -> MEs_R_Corr

cowplot::plot_grid(MEs_R_density, MEs_R_Corr$gtable, labels = c("D", "E"), label_size = plot_labeling_size, rel_widths = c(0.6, 1)) -> density_eigen
all(rownames(metabolites_RP) == rownames(MEs))
## [1] TRUE
dim(metabolites_RP) %>% paste0(c(" samples", " metabolites_RP"))
## [1] "95 samples"         "129 metabolites_RP"
kME <- bicor(metabolites_RP, MEs, maxPOutliers = 0.05)
dim(kME) %>% paste0(c(" metabolites_RP", " modules"))
## [1] "129 metabolites_RP" "4 modules"
intra_cor <- c()
for (i in 1:ncol(metabolites_RP)) {
  m <- stage2results_RP$modules$colors[i]
  intra_cor[i] <- kME[i, paste0("ME", m)]
  if(m != 0){
    intra_cor[i] <- kME[i, paste0("ME", m)]
  } else{
    intra_cor[i] <- NA
  }
  
}

idx <- which(is.na(intra_cor))
intra_cor <- intra_cor[-idx]

plot(density(intra_cor), main = "Correlations with module-eigenmetabolites_HILIC (within module correlation)\nNo ME0", xlim = c(-1,1))
# Corr within modules
corr_within_module <- function(metabolites_RP, modules, module_x = 1){
  idx.metabolites_RP <- which(modules$colors == module_x)
  idx.me <- which(colnames(modules$MEs) == paste0("ME",module_x))
  kME_x <- bicor(metabolites_RP[,idx.metabolites_RP], modules$MEs[,idx.me], maxPOutliers = 0.05)
  kME_x
}

ggplot.list <- list()

for(m in colnames(stage2results_RP$modules$MEs)){
  h <- as.numeric(sub("ME","", m))
  data.frame(x = suppressWarnings(corr_within_module(metabolites_RP = metabolites_RP, modules = stage2results_RP$modules, module_x = h))) %>% 
    ggplot() + 
    #geom_density(aes(x = x), fill = labels2colors(h), color = "black", alpha = 0.5) + 
    geom_histogram(aes(x), fill = labels2colors(h), color = "black", alpha = 0.5, bins = 20) + 
    xlim(-1, 1) +
    theme_bw()+
    xlab("metabolites_RP correlation")+
    ggtitle(paste0(prefix,m)) -> da_plot
  
  ggplot.list[[m]] <- da_plot
}

ggplot.list <- ggplot.list[ggplot.list %>% names() %>% sub("ME", "", .) %>% as.numeric() %>% order()]

cowplot::plot_grid(plotlist = ggplot.list, ncol = 6) -> density_all_plot
# comine to one plot
cowplot::plot_grid(si_mc_plot , density_eigen, ncol = 1, rel_heights = c(0.8,1)) -> part_1
cowplot::plot_grid(part_1, module_size_barplot, labels = c("", "C"), label_size = plot_labeling_size, rel_widths = c(1,0.5)) -> part_2
cowplot::plot_grid(part_2, density_all_plot, ncol = 1, rel_heights = c(0.8,1), labels = c("", "F"), label_size = plot_labeling_size)