Load all the packages

library("ranacapa")
library("phyloseq")
library("ggplot2")
library("stringr")
library("plyr")
library("reshape2")
library("reshape")
library("dplyr")
library("tidyr")
library("doBy")
library("plyr")
library("microbiome")
library("ggpubr")
library("vegan")
library("tidyverse")
library("magrittr")
library("cowplot")
library("dendextend")
library("WGCNA")
library("metagenomeSeq")
library("decontam")
library("RColorBrewer")
library("ampvis2")
library("ggpubr")
library("formatR")
library("DT")
library("taxcleanr") #devtools::install_github("shashank-KU/taxcleanr")
library("pheatmap")
library("DESeq2")
library("microbiomeMarker")

1 Overview of the multi-omics

2 Sampling for low inclusion of beta-mannnan trial

A total of 30 salmon were sampled at the first time point (T0), followed by the sampling of 120 salmon individuals (30 for each diet) at each subsequent time point (T1 – 5 weeks after the trial start; T2 – 10 weeks after the trial start; T3 – 15 weeks after the trial start). The distal gut section was dissected using sterile scalpels and tweezers, and approximately 100 mg of gut content was collected from each fish using inoculation loops.

3 Phenotypic scoring

annotation_colors <- list(
  Timepoint = c(`T0`="gray" ,`T1` = "#FF7F00", `T2` = "#FFD92F", `T3`="#F781BF"),
  Diet = c(`ext-ctrl`="#FDC086", `CTR`="#e41a1cff", `MC1`="#377EB8", `MC2`="#4DAF4A", `MN3`="#984EA3"))

# Specify the path to your CSV file on GitHub
csv_file <- "Phenotypic/Metrics_low_dosage.csv"
csv_url <- paste0("https://raw.githubusercontent.com/shashank-KU/ImprovaFish-MDF-Effects/main/", csv_file)

# Read the CSV file
fish_data <- read.csv(url(csv_url), row.names = 1)


fish_data$Diet <- factor(fish_data$Diet, levels=c('start', 'CTR', 'MC1', 'MC2', 'MN3'))

plot2 <- ggboxplot(fish_data, x = "Diet", y = "BW",
          color = "black", palette = "jco", legend = "none", outlier.shape = NA)+ 
  #stat_compare_means(comparisons = my_comparisons, hide.ns = F) +
  #stat_compare_means() +
  ggplot2::geom_jitter(
    mapping = aes_string(fill="Diet"),
    position = ggplot2::position_jitter(seed=123), 
    shape = 21,
    color = "black",
    size = 2,
    alpha = 0.8
  ) +
  theme_bw() +  ylab("Body weigth") +
  theme(legend.position="none",axis.title.x=element_blank()) +
  scale_fill_manual(values =  annotation_colors$Diet) + 
  scale_colour_manual( values = annotation_colors$Diet) +
 facet_grid(. ~ Timepoint, scales="free_x", space = "free_x") 
# fish_data %>%
#     complete(Timepoint, Diet, fill = list(BW = NA, Length = NA)) %>%
#     group_by(Timepoint, Diet) %>%
#     summarise(across(c(Heart_Score, Fat_Score,cataract_V,cataract_H  ), mean, na.rm = TRUE), .groups = 'drop') %>%
#     mutate(across(where(is.numeric), round, 2)) %>%
#     DT::datatable()

library(dplyr)
library(tidyr)

fish_data %>%
  complete(Timepoint, Diet, fill = list(BW = NA, Length = NA, GW = NA, CF = NA, HSI = NA, CSI = NA)) %>%
  group_by(Timepoint, Diet) %>%
  summarise(
    Mean_BW = mean(BW, na.rm = TRUE),
    Mean_Length = mean(Length, na.rm = TRUE),
    Mean_GW = mean(GW, na.rm = TRUE),
    Mean_CF = mean(CF, na.rm = TRUE),
    Mean_HSI = mean(HSI, na.rm = TRUE),
    Mean_CSI = mean(CSI, na.rm = TRUE),
    Mean_Heart_W = mean(Heart_W, na.rm = TRUE),
    Mean_Liver_W = mean(Liver_W, na.rm = TRUE),
    SEM_BW = sd(BW, na.rm = TRUE) / sqrt(sum(!is.na(BW))),
    SEM_Length = sd(Length, na.rm = TRUE) / sqrt(sum(!is.na(Length))),
    SEM_GW = sd(GW, na.rm = TRUE) / sqrt(sum(!is.na(GW))),
    SEM_CF = sd(CF, na.rm = TRUE) / sqrt(sum(!is.na(CF))),
    SEM_HSI = sd(HSI, na.rm = TRUE) / sqrt(sum(!is.na(HSI))),
    SEM_CSI = sd(CSI, na.rm = TRUE) / sqrt(sum(!is.na(CSI))),
    SEM_Heart_W = sd(Heart_W, na.rm = TRUE) / sqrt(sum(!is.na(Heart_W))),
    SEM_Liver_W = sd(Liver_W, na.rm = TRUE) / sqrt(sum(!is.na(Liver_W))),

    .groups = 'drop'
  ) %>%
  mutate(
    across(where(is.numeric), ~round(., 2)),
    across(starts_with("Mean_"), ~paste(., "±", get(paste0("SEM_", sub("Mean_", "", cur_column())))))
  ) %>%
  filter(complete.cases(.)) %>%
  select(Timepoint, Diet, starts_with("Mean_"), -starts_with("SEM_")) %>%
  rename_with(~sub("Mean_", "", .), starts_with("Mean_")) %>% 
  t() %>%
  DT::datatable() 

4 Metagenomics

Import data and clean the taxonomy

base_url <- "https://raw.githubusercontent.com/shashank-KU/ImprovaFish-MDF-Effects/main/"
# Specify the path to your RDS file on GitHub
rds_file <- "Metagenomics/low_dosage_trial/metagenomics_low_MDF.rdata"
rds_url <- paste0(base_url, rds_file)
# Read the RDS file
all <- readRDS(url(rds_url))
tax <- data.frame(tax_table(all), stringsAsFactors = FALSE)
tax <- tax[,1:7] # No info in col 8-15
# Set informative colnames
colnames(tax) <- c("Kingdom", "Phylum","Class","Order","Family","Genus", "Species")
tax.clean <- data.frame(row.names = row.names(tax),
                        Kingdom = str_replace(tax[,1], "d__",""), 
                        Phylum = str_replace(tax[,2], "p__",""),
                        Class = str_replace(tax[,3], "c__",""),
                        Order = str_replace(tax[,4], "o__",""),
                        Family = str_replace(tax[,5], "f__",""),
                        Genus = str_replace(tax[,6], "g__",""),
                        Species = str_replace(tax[,7], "s__",""), 
                        stringsAsFactors = FALSE)
tax.clean[is.na(tax.clean)] <- ""
# - Clean rank by rank
# Kingdom - Remove the unassigned completely
# Phylum
# Class
# Remove extra info about origin from some bacteria
# Remove all fields that contain "uncultured", "Unknown" or "Ambigious"
bad <- c("Ambiguous_taxa","uncultured", "Subgroup_21")
tax.clean[tax.clean$Class %in% bad,3:7] <- ""

# Order
bad <- c("0319-6G20","1-20","11-24", "ADurb.Bin180","D8A-2", "Group_1.1c", "JGI_0000069-P22","Marine_Group_II",
         "Pla3_lineage","Run-SP154",
         "Ambiguous_taxa", "uncultured", "UBA10353_marine_group",
         "Subgroup_17", "SAR86_clade", "SAR11_clade", "SAR202_clade")
tax.clean[tax.clean$Order %in% bad,4:7] <- ""

# Family
bad <- c("Ambiguous_taxa","11-24","67-14", "uncultured", "SAR116_clade", "Run-SP154", 
         "Marine_Group_II", "env.OPS_17", "SAR116_clade", "S085", "S-70", "NS9_marine_group", "Mitochondria")
tax.clean[tax.clean$Family %in% bad,5:7] <- ""

# Genus
bad <- c("Ambiguous_taxa","Unknown_Family","uncultured","Subgroup_10", "1174-901-12", "67-14")
tax.clean[tax.clean$Genus %in% bad,6:7] <- ""

# Species
bad <- c("Ambiguous_taxa","marine_metagenome","low_GC","wastewater_metagenome","unidentified", 
         "uncultured_synthetic", "uncultured_organism")
tax.clean[tax.clean$Species %in% bad,6:7] <- ""

#tax.clean[grepl("uncultured", tax.clean$Species),"Species"] <- ""
#tax.clean[grepl("unidentified", tax.clean$Species),"Species"] <- ""

# Remove remove ".", change "-" and " " to "_"
for (i in 1:ncol(tax.clean)){
  tax.clean[,i] <- str_replace_all(tax.clean[,i], "[.]","")
  tax.clean[,i] <- str_replace_all(tax.clean[,i], "[(]","")
  tax.clean[,i] <- str_replace_all(tax.clean[,i], "[)]","")
  tax.clean[,i] <- str_replace_all(tax.clean[,i], "-","_")
  tax.clean[,i] <- str_replace_all(tax.clean[,i], " ","_")
}

for (i in 1:7){ tax.clean[,i] <- as.character(tax.clean[,i])}
# File holes in the tax table
for (i in 1:nrow(tax.clean)){
  #  Fill in missing taxonomy
  if (tax.clean[i,2] == ""){
    kingdom <- paste("Kingdom_", tax.clean[i,1], sep = "")
    tax.clean[i, 2:7] <- kingdom
  } else if (tax.clean[i,3] == ""){
    phylum <- paste("Phylum_", tax.clean[i,2], sep = "")
    tax.clean[i, 3:7] <- phylum
  } else if (tax.clean[i,4] == ""){
    class <- paste("Class_", tax.clean[i,3], sep = "")
    tax.clean[i, 4:7] <- class
  } else if (tax.clean[i,5] == ""){
    order <- paste("Order_", tax.clean[i,4], sep = "")
    tax.clean[i, 5:7] <- order
  } else if (tax.clean[i,6] == ""){
    family <- paste("Family_", tax.clean[i,5], sep = "")
    tax.clean[i, 6:7] <- family
  } else if (tax.clean[i,7] == ""){
    tax.clean$Species[i] <- paste("Genus_",tax.clean$Genus[i], sep = "_")
  }
}

rm(bad, class, family, i, kingdom,new,order,phylum,uncul)

tax_table(all) <- as.matrix(tax.clean)
all
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 7521 taxa and 168 samples ]
## sample_data() Sample Data:       [ 168 samples by 12 sample variables ]
## tax_table()   Taxonomy Table:    [ 7521 taxa by 7 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 7521 tips and 7465 internal nodes ]
## refseq()      DNAStringSet:      [ 7521 reference sequences ]
all.clean <- subset_taxa(all,
  Kingdom != "Unassigned" &
  Kingdom != "Archaea" &
  Kingdom != "Eukaryota" &
  Family != "Mitochondria" &
  Order != "Chloroplast" &
  Genus != "Ralstonia"
) %>%
prune_taxa(taxa_sums(.) > 0, .)
all.clean
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 6865 taxa and 168 samples ]
## sample_data() Sample Data:       [ 168 samples by 12 sample variables ]
## tax_table()   Taxonomy Table:    [ 6865 taxa by 7 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 6865 tips and 6817 internal nodes ]
## refseq()      DNAStringSet:      [ 6865 reference sequences ]

Rarefaction plot

sample_data(all.clean)$New_Diet <- factor(sample_data(all.clean)$New_Diet, levels=c('ext-ctrl', 'CTR', 'MC1', 'MC2', 'MN3'))

annotation_colors <- list(
  samplingTime = c(`T0`="gray" ,`T1` = "#FF7F00", `T2` = "#FFD92F", `T3`="#F781BF"),
  New_Diet = c(`ext-ctrl`="#FDC086", `CTR`="#e41a1cff", `MC1`="#377EB8", `MC2`="#4DAF4A", `MN3`="#984EA3"))

p <- ggrare(all.clean, step = 1000, 
            color = "samplingTime", 
            se = F,
            parallel = TRUE,
            plot = FALSE)
cols  <- c(brewer.pal(8,"Set1"), brewer.pal(7,"Dark2"),brewer.pal(7,"Set2"),brewer.pal(12,"Set3"),brewer.pal(7,"Accent"),brewer.pal(12,"Paired"),"gray")

p <- p + theme_bw() + 
  scale_fill_manual(values =annotation_colors$samplingTime) +
  scale_colour_manual( values = annotation_colors$samplingTime) +
  facet_wrap(~New_Diet) +
    guides(color = guide_legend(title = "Life stage"))

Alpha and Beta diversity

shannon.div <- estimate_richness(all.clean, measures = c("Shannon", "Observed"))
sampledata1<- data.frame(sample_data(all.clean))
row.names(shannon.div) <- gsub("[.]","-", row.names(shannon.div))
sampleData <- merge(sampledata1, shannon.div, by = 0 , all = TRUE)

my_comparisons <- list( c("ext-ctrl", "MC1"), c("ext-ctrl", "MC2"), c("ext-ctrl", "MN3"), c("CTR", "MC1"), c("CTR", "MC2"), c("CTR", "MN3"))


p1 <- ggboxplot(sampleData, x = "New_Diet", y = "Observed",
          color = "black", palette = "jco", legend = "none", outlier.shape = NA)+ 
  stat_compare_means(comparisons = my_comparisons) +
  stat_compare_means(label.y = 350) +
  ggplot2::geom_jitter(
    mapping = aes_string(fill="New_Diet"),
    position = ggplot2::position_jitter(seed=123), 
    shape = 21,
    color = "black",
    size = 2,
    alpha = 0.8
  ) +
  theme_bw() +  
  theme(legend.position="none",axis.title.x=element_blank()) +
  scale_fill_manual(values =  annotation_colors$New_Diet) + 
  scale_colour_manual( values = annotation_colors$New_Diet)   + 
  labs(y= "Observed richness (ASVs)") 

set.seed(1)
PCoA_bray <- ordinate(physeq = all.clean, method = "PCoA", distance = "bray")
PCoA_bray_plot<- plot_ordination(
  physeq = all.clean, 
  ordination = PCoA_bray, 
  color = "New_Diet"
) + 
  geom_point(aes(fill ="New_Diet" ),  size =2) + 
  geom_point(shape = 1, size = 2,colour = "black") +
  theme_bw() + 
  xlab("PCoA 1 [18.2 %]") + ylab("PCoA 2 [8.6 %]") + 
  stat_ellipse() + 
  scale_fill_manual(values =  annotation_colors$New_Diet) + 
  scale_colour_manual( values = annotation_colors$New_Diet) +
  guides(color = guide_legend(title = "Diet")) 



bottom_row <- plot_grid(p1, PCoA_bray_plot, labels = c('B', 'C'), align = 'h', rel_widths = c(1, 1.3))

Supplementary Fig. S4

plot_grid(p, bottom_row, labels = c('A', ''), ncol = 1, rel_heights = c(1, 1.2))

Computes the Bray-Curtis distance between all the samples in the dataset “all.clean” and tests for differences between samples in the variable “New_Diet” using the Adonis2 function. The Adonis2 function was run with 9999 permutations.

sampledf <- data.frame(sample_data(all.clean))
bcdist <- phyloseq::distance(all.clean, method="bray",normalized=TRUE) 
adonis2(bcdist ~ New_Diet, 
        data = sampledf, permutations = 9999)
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = bcdist ~ New_Diet, data = sampledf, permutations = 9999)
##           Df SumOfSqs      R2      F Pr(>F)    
## New_Diet   4    3.395 0.06681 2.9174 0.0001 ***
## Residual 163   47.417 0.93319                  
## Total    167   50.812 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Contamination removal

df <- as.data.frame(sample_data(all.clean)) # Put sample_data into a ggplot-friendly data.frame
df$Sample_or_Control <- ifelse( df$New_Diet  %in% c("ext-ctrl"), "Control_Sample", "True_Sample")
sample_data(all.clean) <- df
df$LibrarySize <- sample_sums(all.clean)
df <- df[order(df$LibrarySize),]
df$Index <- seq(nrow(df))
sample_data(all.clean)$is.neg <- sample_data(all.clean)$Sample_or_Control == "Control_Sample"
contamdf.prev05 <- isContaminant(all.clean, method="prevalence", neg="is.neg", threshold=0.5)
paste("The number of contamination found is",table(contamdf.prev05$contaminant)[2])
## [1] "The number of contamination found is 86"
all.noncontam <- prune_taxa(!contamdf.prev05$contaminant, all.clean)

psdata <- subset_samples(all.noncontam, Sample_or_Control=="True_Sample")
psdata <- prune_taxa(taxa_sums(psdata) > 0, psdata)
psdata
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 6468 taxa and 156 samples ]
## sample_data() Sample Data:       [ 156 samples by 14 sample variables ]
## tax_table()   Taxonomy Table:    [ 6468 taxa by 7 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 6468 tips and 6426 internal nodes ]
## refseq()      DNAStringSet:      [ 6468 reference sequences ]

Inspect the number of reads per sample and compare to rarefaction curves

psdata.p <- prune_taxa(taxa_sums(psdata) >0, psdata)
psdata.p
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 6468 taxa and 156 samples ]
## sample_data() Sample Data:       [ 156 samples by 14 sample variables ]
## tax_table()   Taxonomy Table:    [ 6468 taxa by 7 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 6468 tips and 6426 internal nodes ]
## refseq()      DNAStringSet:      [ 6468 reference sequences ]

Taxonomic classification- Phylum level taxonomic distribution. Bars report the mean abundance for each individual sample.

psdata.r<- transform_sample_counts(psdata.p, function(x) x / sum(x) )
Final.RNA <- aggregate_rare(psdata.r, level = "Phylum", detection = 1/100, prevalence = 20/100)
getPalette = colorRampPalette(brewer.pal(10, "Dark2")) 
PhylaPalette = getPalette(10)

Final.RNA_phylum_plot<- plot_composition(Final.RNA, sample.sort = "Proteobacteria",otu.sort = "abundance", verbose = TRUE)
Final.RNA_phylum_plot <- Final.RNA_phylum_plot + 
  theme_bw() + 
  theme(axis.text.x=element_blank(), axis.ticks.x=element_blank()) +
  scale_fill_manual(values = PhylaPalette)
Final.RNA_phylum_plot

#Bacterial Community Composition for Manuscript
Final.seq.melt.RNA <- psmelt(tax_glom(psdata.r, "Species"))
tax_ranks <- c("Phylum", "Class", "Order", "Family", "Genus", "Species")

for (rank in tax_ranks) {
  n_unique <- length(unique(Final.seq.melt.RNA[[rank]]))
  message(paste(rank, ": ", n_unique, sep = ""))
}
paste("number of unique Phylum is", table(grepl("Kingdom", unique(Final.seq.melt.RNA$Phylum)))[1])
## [1] "number of unique Phylum is 44"
paste("number of unique Class is", table(grepl("Kingdom|Phylum", unique(Final.seq.melt.RNA$Class)))[1])
## [1] "number of unique Class is 106"
paste("number of unique Order is", table(grepl("Kingdom|Class|Phylum", unique(Final.seq.melt.RNA$Order)))[1])
## [1] "number of unique Order is 243"
paste("number of unique Family is", table(grepl("Kingdom|Order|Class|Phylum", unique(Final.seq.melt.RNA$Family)))[1])
## [1] "number of unique Family is 396"
paste("number of unique Genus is", table(grepl("Kingdom|Family|Order|Class|Phylum", unique(Final.seq.melt.RNA$Genus)))[1])
## [1] "number of unique Genus is 839"
paste("number of unique Species is", table(grepl("Kingdom|Family|Order|Class|Phylum|Genus", unique(Final.seq.melt.RNA$Species)))[1])
## [1] "number of unique Species is 419"

Supplementary table for the manuscript

Phylum_df <- summaryBy(Abundance~Phylum, data=Final.seq.melt.RNA, FUN=sum)
Phylum_df$Percent <- round(Phylum_df$Abundance.sum/sum(Phylum_df$Abundance.sum)*100, 4)
Phylum_df <- plyr::arrange(Phylum_df, plyr::desc(Percent))
Phylum_df$PercentageRound <- round(Phylum_df$Percent, digits = 2)
DT::datatable(Phylum_df)
class_df <- summaryBy(Abundance~Class, data=Final.seq.melt.RNA, FUN=sum)
class_df$Percent <- round(class_df$Abundance.sum/sum(class_df$Abundance.sum)*100, 4)
class_df <- plyr::arrange(class_df, plyr::desc(Percent))
class_df$PercentageRound <- round(class_df$Percent, digits = 2)
DT::datatable(class_df)
order_df <- summaryBy(Abundance~Order, data=Final.seq.melt.RNA, FUN=sum)
order_df$Percent <- round(order_df$Abundance.sum/sum(order_df$Abundance.sum)*100, 4)
order_df <- plyr::arrange(order_df, plyr::desc(Percent))
order_df$PercentageRound <- round(order_df$Percent, digits = 2)
DT::datatable(order_df)
family_df <- summaryBy(Abundance~Family, data=Final.seq.melt.RNA, FUN=sum)
family_df$Percent <- round(family_df$Abundance.sum/sum(family_df$Abundance.sum)*100, 4)
family_df <- plyr::arrange(family_df, plyr::desc(Percent))
family_df$PercentageRound <- round(family_df$Percent, digits = 2)
DT::datatable(family_df)
genus_df <- summaryBy(Abundance~Genus, data=Final.seq.melt.RNA, FUN=sum)
genus_df$Percent <- round(genus_df$Abundance.sum/sum(genus_df$Abundance.sum)*100, 4)
genus_df <- plyr::arrange(genus_df, plyr::desc(Percent))
genus_df$Round <- round(genus_df$Percent, digits = 2)
DT::datatable(genus_df)

Supplementary Figure

FigureS4_1 <- transform_sample_counts(psdata.r, function(x) x / sum(x) )
FigureS4_1 <- aggregate_top_taxa(FigureS4_1, level = "Phylum", top = 10)
getPalette = colorRampPalette(brewer.pal(10, "Dark2")) 
PhylaPalette = getPalette(10)
cols  <- c(brewer.pal(8,"Set1"), brewer.pal(7,"Dark2"),brewer.pal(7,"Set2"),brewer.pal(12,"Set3"),brewer.pal(7,"Accent"),brewer.pal(12,"Paired"),"gray")

FigureS4_1_plot<- plot_composition(FigureS4_1, sample.sort = "Proteobacteria",otu.sort = "abundance", verbose = TRUE)
FigureS4_1_plot <- FigureS4_1_plot + 
  theme_bw() + 
  #theme(axis.text.x=element_blank(), axis.ticks.x=element_blank()) +
  scale_fill_manual(values = cols) + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+ ylab("Relative abundance")+ labs(fill = "Taxa") +
  theme(axis.text.x=element_blank(),
        axis.ticks.x=element_blank())


FigureS4_1 <- transform_sample_counts(psdata.r, function(x) x / sum(x) )

FigureS4_2 <- aggregate_top_taxa(FigureS4_1, level = "Class",  top = 10)
getPalette = colorRampPalette(brewer.pal(10, "Dark2")) 
PhylaPalette = getPalette(10)
cols  <- c(brewer.pal(8,"Set1"), brewer.pal(7,"Dark2"),brewer.pal(7,"Set2"),brewer.pal(12,"Set3"),brewer.pal(7,"Accent"),brewer.pal(12,"Paired"),"gray")

FigureS4_2_plot<- plot_composition(FigureS4_2, sample.sort = "Gammaproteobacteria",otu.sort = "abundance", verbose = TRUE)
FigureS4_2_plot <- FigureS4_2_plot + 
  theme_bw() + 
  #theme(axis.text.x=element_blank(), axis.ticks.x=element_blank()) +
  scale_fill_manual(values = cols) + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+ ylab("Relative abundance")+ labs(fill = "Taxa") +
  theme(axis.text.x=element_blank(),
        axis.ticks.x=element_blank())


FigureS4_1 <- transform_sample_counts(psdata.r, function(x) x / sum(x) )

FigureS4_3 <- aggregate_top_taxa(FigureS4_1, level = "Family", top = 10)
getPalette = colorRampPalette(brewer.pal(10, "Dark2")) 
PhylaPalette = getPalette(10)
cols  <- c(brewer.pal(8,"Set1"), brewer.pal(7,"Dark2"),brewer.pal(7,"Set2"),brewer.pal(12,"Set3"),brewer.pal(7,"Accent"),brewer.pal(12,"Paired"),"gray")

FigureS4_3_plot<- plot_composition(FigureS4_3, sample.sort = "Other",otu.sort = "abundance", verbose = TRUE)
FigureS4_3_plot <- FigureS4_3_plot + 
  theme_bw() + 
  #theme(axis.text.x=element_blank(), axis.ticks.x=element_blank()) +
  scale_fill_manual(values = cols) + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) + ylab("Relative abundance")+ labs(fill = "Taxa") +
  theme(axis.text.x=element_blank(),
        axis.ticks.x=element_blank())



FigureS4_1 <- transform_sample_counts(psdata.r, function(x) x / sum(x) )

FigureS4_4 <- aggregate_top_taxa(FigureS4_1, level = "Genus", top = 10)
getPalette = colorRampPalette(brewer.pal(10, "Dark2")) 
PhylaPalette = getPalette(10)
cols  <- c(brewer.pal(8,"Set1"), brewer.pal(7,"Dark2"),brewer.pal(7,"Set2"),brewer.pal(12,"Set3"),brewer.pal(7,"Accent"),brewer.pal(12,"Paired"),"gray")

FigureS4_4_plot<- plot_composition(FigureS4_4, sample.sort = "Other",otu.sort = "abundance", verbose = TRUE)
FigureS4_4_plot <- FigureS4_4_plot + 
  theme_bw() + 
  #theme(axis.text.x=element_blank(), axis.ticks.x=element_blank()) +
  scale_fill_manual(values = cols) + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+ ylab("Relative abundance")+ labs(fill = "Taxa") +
  theme(axis.text.x=element_blank(),
        axis.ticks.x=element_blank())


plot_grid(FigureS4_1_plot, FigureS4_3_plot, FigureS4_2_plot, FigureS4_4_plot, labels = 'AUTO', ncol = 2, rel_widths = c(1,1.1,1,1.1))

Alpha and Beta diversity

shannon.div <- estimate_richness(psdata.p, measures = c("Shannon", "Observed"))
sampledata1<- data.frame(sample_data(psdata.p))
row.names(shannon.div) <- gsub("[.]","-", row.names(shannon.div))
sampleData <- merge(sampledata1, shannon.div, by = 0 , all = TRUE)
sampleData <- sampleData %>%
  mutate(New_Diet = case_when(
    samplingTime == "T0" ~ "start",
    TRUE ~ New_Diet
  ))
sampleData$New_Diet <- factor(sampleData$New_Diet, levels=c( 'CTR', 'MC1', 'MC2', 'MN3'))
sampleData$samplingTime <- factor(sampleData$samplingTime, levels=c('T0', 'T1', 'T2', 'T3'))

my_comparisons <- list( c("CTR", "MC1"), c("CTR", "MC2"), c("CTR", "MN3"), 
                        c("MC1", "MC2"), c("MC1", "MN3"), c("MC2", "MN3") )


plot1 <- ggboxplot(sampleData, x = "New_Diet", y = "Shannon",
          color = "black", palette = "jco", legend = "none", outlier.shape = NA)+ 
  geom_pwc(comparisons = my_comparisons, hide.ns = TRUE) +
  #stat_compare_means(label.y = 5) +
  ggplot2::geom_jitter(
    mapping = aes_string(fill="New_Diet"),
    position = ggplot2::position_jitter(seed=123), 
    shape = 21,
    color = "black",
    size = 2,
    alpha = 0.8
  ) +
  theme_bw() +  scale_y_continuous("Shannon index") +
  theme(legend.position="none",axis.title.x=element_blank()) +
  scale_fill_manual(values =  annotation_colors$New_Diet) + 
  scale_colour_manual( values = annotation_colors$New_Diet) +
 facet_grid(. ~ samplingTime, scales="free_x", space = "free_x") 

my_comparisons <- list(c("T0", "T3"), c("T1", "T3"), c("T2", "T3"))

# Assuming your dataframe is named sampleData
sampleData$New_Diet[sampleData$New_Diet == "start"] <- "CTR"



plot2_x <- ggboxplot(sampleData, x = "samplingTime", y = "Shannon",
          color = "black", palette = "jco", legend = "none", outlier.shape = NA)+ 
    stat_compare_means(comparisons = my_comparisons) +
    #stat_compare_means(label.y = 5) +
    ggplot2::geom_jitter(
        mapping = aes_string(fill="samplingTime"),
        position = ggplot2::position_jitter(seed=123), 
        shape = 21,
        color = "black",
        size = 2,
        alpha = 0.8
    ) +
    theme_bw() +  
    theme(legend.position="none",axis.title.x=element_blank()) +
    scale_fill_manual(values =  annotation_colors$samplingTime) + 
    scale_colour_manual( values = annotation_colors$samplingTime) +
    facet_grid(. ~ New_Diet) + 
    labs(y= "Shannon index") + ylim(c(1.5, 5.2))
set.seed(1)
PCoA_bray <- ordinate(physeq = psdata.p, method = "PCoA", distance = "bray")
plot3 <- plot_ordination(
  physeq = psdata.p, 
  ordination = PCoA_bray, 
  color = "samplingTime"
) + 
  geom_point(aes(fill ="samplingTime" ),  size =2) + 
  geom_point(shape = 1, size = 2,colour = "black") +
  theme_bw() + ggtitle("PCoA Plot - Bray") + 
      theme(plot.title = element_text(hjust = 0.5))  + 
 ggtitle("Hindgut: Microbiome") +
  xlab("PCoA 1 [21.1 %]") + ylab("PCoA 2 [7.7 %]") + 
  stat_ellipse() + 
  scale_fill_manual(values =  annotation_colors$samplingTime) + 
  scale_colour_manual( values = annotation_colors$samplingTime) +
  guides(color = guide_legend(title = "Time")) 

# Run adonis test
sampledf <- data.frame(sample_data(psdata.p))
bcdist <- phyloseq::distance(psdata.p, method="bray",normalized=TRUE) 
adonis2(bcdist ~ samplingTime, data = sampledf, permutations = 9999)
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = bcdist ~ samplingTime, data = sampledf, permutations = 9999)
##               Df SumOfSqs      R2      F Pr(>F)    
## samplingTime   3    8.711 0.20536 13.094 0.0001 ***
## Residual     152   33.709 0.79464                  
## Total        155   42.420 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
set.seed(1)
plot4 <- plot_ordination(
  physeq = psdata.p, 
  ordination = PCoA_bray, 
  color = "New_Diet"
) + 
  geom_point(aes(fill ="New_Diet" ),  size =2) + 
  geom_point(shape = 1, size = 2,colour = "black") +
  theme_bw() + ggtitle("PCoA Plot - Bray") + 
      theme(plot.title = element_text(hjust = 0.5))  + 
 ggtitle("Hindgut: Microbiome") +
  xlab("PCoA 1 [21.1 %]") + ylab("PCoA 2 [7.7 %]") + 
  stat_ellipse() + 
  scale_fill_manual(values =  annotation_colors$New_Diet) + 
  scale_colour_manual( values = annotation_colors$New_Diet) +
  guides(color = guide_legend(title = "Diet")) 

# Run adonis test
sampledf <- data.frame(sample_data(psdata.p))
adonis2(bcdist ~ New_Diet, data = sampledf, permutations = 9999)
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = bcdist ~ New_Diet, data = sampledf, permutations = 9999)
##           Df SumOfSqs      R2     F Pr(>F)
## New_Diet   3    0.914 0.02155 1.116 0.2255
## Residual 152   41.506 0.97845             
## Total    155   42.420 1.00000
# plot5 <- plot_grid(plot1, plot2, labels = c('', ''), label_size = 12, ncol = 1, align = "hv")
# plot6 <- plot_grid(plot3, plot4, labels = c('', ''), label_size = 12, ncol = 1, align = "hv")
# plot_grid(plot5, plot6, ncol = 2, rel_widths = c(1.5, 1))
psdata1 <- subset_samples(psdata, samplingTime != "T0")
table_HM <- rabutable(psdata1, predictor= "New_Diet", type = "Genus", facet_wrap ="samplingTime", N_taxa = 20)


data = table_HM %>%   
  mutate(ID = paste0(variable, ":", wrap, "_", predictor2)) %>%  
  group_by(ID) %>%   
  summarize(meanvalue = mean(log(value))) %>%   
  mutate(feedtime = sub('.*:', '', ID)) %>%  
  mutate(ID = sub(':.*', '', ID)) %>%   
  pivot_wider(names_from = feedtime, values_from = meanvalue)    

rabuplot_df_genus <- as.data.frame(data)
rownames(rabuplot_df_genus) <- data$ID 
rabuplot_df_genus <- rabuplot_df_genus[,-1] 


heatmap_colors <- colorRampPalette(c("#18b29f","#FFFFFF","#ac6721"), interpolate = "spline", space = "rgb")(51)

annotation_col <- 
  colnames(rabuplot_df_genus) %>% 
  as.data.frame() %>% 
  dplyr::rename(common = ".") %>% 
  tidyr::separate(col = common,  into = c("Time", "Diet"), remove = F, extra = "drop") %>% # If replicates are present, we drop them here
  dplyr::select(common,Time, Diet) %>%  
  tibble::column_to_rownames(var = "common")

annotation_colors_1 <- list(
  Time = c(`T1` = "#FF7F00", `T2` = "#FFD92F", `T3`="#F781BF"),
  Diet = c(`CTR`="#e41a1cff", `MC1`="#377EB8", `MC2`="#4DAF4A", `MN3`="#984EA3"))


row.names(rabuplot_df_genus)[3] <- "BCP group"   
plot7 <- pheatmap(t(rabuplot_df_genus), 
         annotation_row = annotation_col,
         annotation_colors = annotation_colors_1,  
         color = heatmap_colors,
         scale = "row", 
         cluster_rows = F, 
         cluster_cols = T, 
         gaps_row = c(4,8),
         show_colnames = T,
        show_rownames = F)

5 Transcriptomics

Import data

# Replace this base URL with the raw content URL of your GitHub repository
base_url <- "https://raw.githubusercontent.com/shashank-KU/ImprovaFish-MDF-Effects/main/"

# Specify the paths to your files on GitHub
host_gut_rawCounts_url <- paste0(base_url, "Transcriptomics/low_dosage_trial/host_gut_count.txt")
sample_info_url <- paste0(base_url, "Transcriptomics/low_dosage_trial/sample_info.csv")

# Read the files
host_gut_rawCounts_total1 <- read.table(url(host_gut_rawCounts_url))
sample_info <- read.csv(url(sample_info_url), row.names = 1)

omics_data_host <- host_gut_rawCounts_total1
host_gut_mapping_file <- sample_info

Initial formatting Main idea of this chunk of code is make sure we have same number of samples in countData and samples in colData for DESeq2. Also the order of the samples in colData need to match the order of the samples in countData

sample_info$ID_New <- paste(sample_info$Time, "_",
                            sample_info$New_Diet, "_", 
                            sample_info$Tank_number, "_", "0",
                            sample_info$Sample_Number, 
                                      sep = "")
head_change <- subset(sample_info, select=c("common" ,"ID_New"))
row.names(sample_info) <- sample_info$ID_New

omics_data_host1 <- omics_data_host[, colnames(omics_data_host) %in% sample_info$ID_New]
omics_data_host <- omics_data_host1
# rename colnames of omics_data_host using new ID
df1 <- subset(sample_info, select= c("common", "ID_New"))
omics_data_host %>% 
  rename_with(~deframe(df1)[.x], .cols = df1$ID_New) %>% 
  select(any_of(df1$ID_New)) -> omics_data_host_new
omics_data_host <- omics_data_host_new
reorder_idx <- match(sample_info$ID_New, colnames(omics_data_host))
omics_data_host <- omics_data_host[ , reorder_idx]
all(colnames(omics_data_host) == sample_info$ID_New)
## [1] TRUE

DESeq2 analysis

df <- round(omics_data_host) %>%
  # The next steps require a data frame and round() returns a matrix
  as.data.frame() %>%
  # Only keep rows that have total counts above the cutoff
  dplyr::filter(rowSums(.) >= 0)

#all(colnames(df) == rownames(sample_info))

dds <- DESeqDataSetFromMatrix(
  countData = df, # Our prepped data frame with counts
  colData = sample_info, # Data frame with annotation for our samples
  design = ~1 # Here we are not specifying a model
)
dds$group <- factor(paste0(dds$Time, dds$New_Diet))
design(dds) <- ~ group-1

dds_trial_1 <- DESeq(dds, parallel = T)
resultsNames(dds_trial_1 )

annot_function <- read_tsv("/Users/shashankgupta/Desktop/ImprovAFish/Salmo_salar-GCA_905237065.2_gene_annotations.tsv")
annot_function <- annot_function[, c("gene_id", "v2.gene_id.NCBI","v2.gene_name.ensembl", "v2.product")]
vst <- vst(dds_trial_1)
pcaPlot <- DESeq2::plotPCA(vst, intgroup = c("Time", "New_Diet", "group"), ntop = nrow(dds_trial_1), returnData = TRUE) 

percentVar <- round(100 * attr(pcaPlot, "percentVar"))

plot8<- ggplot(pcaPlot, aes(PC1, PC2, color=New_Diet)) + 
  geom_point(aes(fill ="New_Diet" ),  size =2.5) + 
  geom_point(shape = 1, size = 2.5,colour = "black") +
  theme_bw() + ggtitle("Hindgut: Host transcriptomics") + 
  theme(plot.title = element_text(hjust = 0.5)) +
  xlab("PC 1 [12 %]") + ylab("PC 2 [6 %]") + 
  stat_ellipse() + scale_fill_manual(values =annotation_colors$New_Diet) + 
  scale_colour_manual( values = annotation_colors$New_Diet) +
  guides(color = guide_legend(title = "Diet")) 


plot8.1<- ggplot(pcaPlot, aes(PC1, PC2, color=Time)) + 
  geom_point(aes(fill ="Time" ),  size =2.5) + 
  geom_point(shape = 1, size = 2.5,colour = "black") +
  theme_bw() + ggtitle("Hindgut: Host transcriptomics") + 
  theme(plot.title = element_text(hjust = 0.5)) +
  xlab("PC 1 [12 %]") + ylab("PC 2 [6 %]") + 
  stat_ellipse() + scale_fill_manual(values =annotation_colors$samplingTime) + 
  scale_colour_manual( values = annotation_colors$samplingTime) +
  guides(color = guide_legend(title = "Life stage")) 


# Perform PERMANOVA using adonis2
dist_matrix <- dist(pcaPlot[, c("PC1", "PC2")])
permanova_result <- adonis2(dist_matrix ~ Time, data = pcaPlot, permutations = 9999)
permanova_result
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = dist_matrix ~ Time, data = pcaPlot, permutations = 9999)
##           Df SumOfSqs      R2      F Pr(>F)    
## Time       2   108426 0.36678 40.256 0.0001 ***
## Residual 139   187194 0.63322                  
## Total    141   295620 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

6 Metatranscriptomics

Import data and intial preprocessing

# Specify the path to your RDS file on GitHub
rds_file <- "Metatranscriptomics/low_dosage_trial/dds_metatranscriptomics.rds"
rds_url <- paste0("https://raw.githubusercontent.com/shashank-KU/ImprovaFish-MDF-Effects/main/", rds_file)

# Read the RDS file
dds_metatranscriptomics <- readRDS(url(rds_url))


# Filter out T0 time point
dds_metatranscriptomics <- dds_metatranscriptomics[ , dds_metatranscriptomics$TimePoint != "T0"]

# Prepare data for analysis
dds_metatranscriptomics$New_Diet <- toupper(dds_metatranscriptomics$Treatment)
dds_metatranscriptomics$Time <- toupper(dds_metatranscriptomics$TimePoint)
dds_metatranscriptomics$group <- factor(paste0(dds_metatranscriptomics$Time, dds_metatranscriptomics$New_Diet))
design(dds_metatranscriptomics) <- ~ group-1

# Calculate gene sums and filter out low-expressed genes
gene_sums <- rowSums(counts(dds_metatranscriptomics, normalized = FALSE))
dds_filtered <- dds_metatranscriptomics[gene_sums >= 50,]

# Perform DESeq analysis
dds_metatranscriptomics <- DESeq(dds_filtered, parallel = TRUE)

# Explore results
resultsNames(dds_metatranscriptomics)
##  [1] "groupT1CTR" "groupT1MC1" "groupT1MC2" "groupT1MN3" "groupT2CTR"
##  [6] "groupT2MC1" "groupT2MC2" "groupT2MN3" "groupT3CTR" "groupT3MC1"
## [11] "groupT3MC2" "groupT3MN3"
res <- results(dds_metatranscriptomics)

# Count and print the number of differentially expressed genes with adjusted p-value < 0.05
cat("Number of differentially expressed microbial genes (padj < 0.05):", sum(res$padj < 0.05), "\n")
## Number of differentially expressed microbial genes (padj < 0.05): NA
# Display summary of DESeq results
summary(res)
## 
## out of 82484 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up)       : 3575, 4.3%
## LFC < 0 (down)     : 178, 0.22%
## outliers [1]       : 0, 0%
## low counts [2]     : 44779, 54%
## (mean count < 1)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
# Order results by adjusted p-value
res <- res[order(res$padj),]

# Filter and arrange results for display
res_tbl <- as_tibble(res, rownames = "ENSEMBL") %>%
  filter(padj < 0.05) %>%
  arrange(padj)

# Perform variance stabilizing transformation (VST)
vst_data <- vst(dds_metatranscriptomics)

# Generate PCA plot
pcaPlot <- DESeq2::plotPCA(vst_data, intgroup = c("Time", "New_Diet", "group"), ntop = nrow(dds_trial_1), returnData = TRUE) 

# Extract percentage of variance explained by PCs
percentVar <- round(100 * attr(pcaPlot, "percentVar"))

# Create PCA plot using ggplot2
plot9 <- ggplot(pcaPlot, aes(PC1, PC2, color=New_Diet)) + 
  geom_point(aes(fill ="New_Diet" ),  size =2.5) + 
  geom_point(shape = 1, size = 2.5,colour = "black") +
  theme_bw() + ggtitle("Hindgut: Metatranscriptomics") + 
  theme(plot.title = element_text(hjust = 0.5)) +
  xlab(paste("PC 1 [", percentVar[1], "%]")) + ylab(paste("PC 2 [", percentVar[2], "%]")) + 
  stat_ellipse() + scale_fill_manual(values = annotation_colors$New_Diet) + 
  scale_colour_manual( values = annotation_colors$New_Diet) +
  guides(color = guide_legend(title = "Diet")) 
contrasts <- c("T1MC1", "T1MC2", "T1MN3")

for (contrast in contrasts) {
  contrast_group <- paste0("group", contrast)
  contrast_levels <- list(contrast_group, "groupT1CTR")
  
  res <- results(dds_metatranscriptomics, contrast = contrast_levels)
  
  # Extract the number of differentially expressed genes with adjusted p-value < 0.05
  num_de_genes <- sum(res$padj < 0.05, na.rm = TRUE)
  
  # Print informative message
  cat("Number of differentially expressed microbial genes at time point 1 (T1)", "for CTR vs", contrast_group, "is", num_de_genes, "\n")
}
## Number of differentially expressed microbial genes at time point 1 (T1) for CTR vs groupT1MC1 is 25 
## Number of differentially expressed microbial genes at time point 1 (T1) for CTR vs groupT1MC2 is 24 
## Number of differentially expressed microbial genes at time point 1 (T1) for CTR vs groupT1MN3 is 12
contrasts <- c("T2MC1", "T2MC2", "T2MN3")

for (contrast in contrasts) {
  contrast_group <- paste0("group", contrast)
  contrast_levels <- list(contrast_group, "groupT2CTR")
  
  res <- results(dds_metatranscriptomics, contrast = contrast_levels)
  
  # Extract the number of differentially expressed genes with adjusted p-value < 0.05
  num_de_genes <- sum(res$padj < 0.05, na.rm = TRUE)
  
  # Print informative message
  cat("Number of differentially expressed microbial genes at time point 2 (T2)", contrast, "for CTR vs", contrast_group, "is", num_de_genes, "\n")
}
## Number of differentially expressed microbial genes at time point 2 (T2) T2MC1 for CTR vs groupT2MC1 is 7 
## Number of differentially expressed microbial genes at time point 2 (T2) T2MC2 for CTR vs groupT2MC2 is 35 
## Number of differentially expressed microbial genes at time point 2 (T2) T2MN3 for CTR vs groupT2MN3 is 33
contrasts <- c("T3MC1", "T3MC2", "T3MN3")

for (contrast in contrasts) {
  contrast_group <- paste0("group", contrast)
  contrast_levels <- list(contrast_group, "groupT3CTR")
  
  res <- results(dds_metatranscriptomics, contrast = contrast_levels)
  
  # Extract the number of differentially expressed genes with adjusted p-value < 0.05
  num_de_genes <- sum(res$padj < 0.05, na.rm = TRUE)
  
  # Print informative message
  cat("Number of differentially expressed microbial genes at time point 3 (T3)", contrast, "for CTR vs", contrast_group, "is", num_de_genes, "\n")
}
## Number of differentially expressed microbial genes at time point 3 (T3) T3MC1 for CTR vs groupT3MC1 is 25 
## Number of differentially expressed microbial genes at time point 3 (T3) T3MC2 for CTR vs groupT3MC2 is 21 
## Number of differentially expressed microbial genes at time point 3 (T3) T3MN3 for CTR vs groupT3MN3 is 24
plot_left <- plot_grid(plot2, plot1, plot7$gtable, align = "hv", ncol = 1, rel_heights =  c(.6,.6,1.5), labels = c('B', 'C', 'G'))

plot_right <- plot_grid(plot3, plot4, plot8, plot9, align = "hv", ncol = 1, rel_widths = c(1,1,1,1,1), labels = c('D', 'E', 'F', 'H' ))

plot_grid(plot_left, plot_right, align = "hv", rel_widths = c(2,1))