1 Overview of the multi-omics
2 Sampling for low inclusion of beta-mannnan trial
3 Phenotypic scoring
4 Metagenomics
5 Transcriptomics
6 Metatranscriptomics

Load all the packages

library("ranacapa")
library("phyloseq")
library("ggplot2")
library("stringr")
library("plyr")
library("reshape2")
library("reshape")
library("dplyr")
library("tidyr")
library("doBy")
library("plyr")
library("microbiome")
library("ggpubr")
library("vegan")
library("tidyverse")
library("magrittr")
library("cowplot")
library("dendextend")
library("WGCNA")
library("metagenomeSeq")
library("decontam")
library("RColorBrewer")
library("ampvis2")
library("ggpubr")
library("formatR")
library("DT")
library("taxcleanr") #devtools::install_github("shashank-KU/taxcleanr")
library("pheatmap")
library("DESeq2")
library("microbiomeMarker")

1 Overview of the multi-omics

2 Sampling for low inclusion of beta-mannnan trial

3 Phenotypic scoring

# Specify the path to your CSV file on GitHub
csv_file <- "Phenotypic/Metrics_high_dosage.csv"
csv_url <- paste0("https://raw.githubusercontent.com/shashank-KU/ImprovaFish-MDF-Effects/main/", csv_file)

# Read the CSV file
fish_data <- read.csv(url(csv_url), row.names = 1)

fish_data$Diet <- factor(fish_data$Diet, levels=c('CTR', '4%MN3'))
annotation_colors <- list(
  Diet = c(`CTR`="#e41a1cff", `4%MN3`="#984EA3"))

plot_2A <- ggboxplot(fish_data, x = "Diet", y = "Body.weight",
          color = "black", palette = "jco", legend = "none", outlier.shape = NA)+ 
  #stat_compare_means(comparisons = my_comparisons, hide.ns = F) +
  stat_compare_means(label.y = 370) +
  ggplot2::geom_jitter(
    mapping = aes_string(fill="Diet"),
    position = ggplot2::position_jitter(seed=123), 
    shape = 21,
    color = "black",
    size = 2,
    alpha = 0.8
  ) +
  theme_bw() +   ylab("Body weight") +
  theme(legend.position="none",axis.title.x=element_blank()) +
  scale_fill_manual(values =  annotation_colors$Diet) + 
  scale_colour_manual( values = annotation_colors$Diet)

# fish_data %>%
#     complete(Timepoint, Diet, fill = list(BW = NA, Length = NA)) %>%
#     group_by(Timepoint, Diet) %>%
#     summarise(across(c(Heart_Score, Fat_Score,cataract_V,cataract_H  ), mean, na.rm = TRUE), .groups = 'drop') %>%
#     mutate(across(where(is.numeric), round, 2)) %>%
#     DT::datatable()

library(dplyr)
library(tidyr)

fish_data %>%
  complete(Diet, fill = list(Body.weight = NA, Length = NA, GW = NA, CF = NA, HSI = NA, CSI = NA, Heart_W= NA,Liver_W = NA)) %>%
  group_by(Diet) %>%
  summarise(
    Mean_BW = mean(Body.weight, na.rm = TRUE),
    Mean_Length = mean(Length, na.rm = TRUE),
    Mean_GW = mean(GW, na.rm = TRUE),
    Mean_CF = mean(CF, na.rm = TRUE),
    Mean_HSI = mean(HSI, na.rm = TRUE),
    Mean_CSI = mean(CSI, na.rm = TRUE),
    Mean_Heart_W = mean(Heart_W, na.rm = TRUE),
    Mean_Liver_W = mean(Liver_W, na.rm = TRUE),
    SEM_BW = sd(Body.weight, na.rm = TRUE) / sqrt(sum(!is.na(Body.weight))),
    SEM_Length = sd(Length, na.rm = TRUE) / sqrt(sum(!is.na(Length))),
    SEM_GW = sd(GW, na.rm = TRUE) / sqrt(sum(!is.na(GW))),
    SEM_CF = sd(CF, na.rm = TRUE) / sqrt(sum(!is.na(CF))),
    SEM_HSI = sd(HSI, na.rm = TRUE) / sqrt(sum(!is.na(HSI))),
    SEM_CSI = sd(CSI, na.rm = TRUE) / sqrt(sum(!is.na(CSI))),
    SEM_Heart_W = sd(Heart_W, na.rm = TRUE) / sqrt(sum(!is.na(Heart_W))),
    SEM_Liver_W = sd(Liver_W, na.rm = TRUE) / sqrt(sum(!is.na(Liver_W))),

    .groups = 'drop'
  ) %>%
  mutate(
    across(where(is.numeric), ~round(., 2)),
    across(starts_with("Mean_"), ~paste(., "±", get(paste0("SEM_", sub("Mean_", "", cur_column())))))
  ) %>%
  filter(complete.cases(.)) %>%
  select(Diet, starts_with("Mean_"), -starts_with("SEM_")) %>%
  rename_with(~sub("Mean_", "", .), starts_with("Mean_")) %>%
  DT::datatable()

4 Metagenomics

Import data and clean the taxonomy

# Load data
base_url <- "https://raw.githubusercontent.com/shashank-KU/ImprovaFish-MDF-Effects/main/"
# Specify the path to your RDS file on GitHub
rds_file <- "Metagenomics/high_dosage_trial/metagenomics_high_MDF.rdata"
rds_url <- paste0(base_url, rds_file)
# Read the RDS file
all <- readRDS(url(rds_url))

tax <- data.frame(tax_table(all), stringsAsFactors = FALSE)
tax <- tax[,1:7] # No info in col 8-15
# Set informative colnames
colnames(tax) <- c("Kingdom", "Phylum","Class","Order","Family","Genus", "Species")
library(stringr)
tax.clean <- data.frame(row.names = row.names(tax),
                        Kingdom = str_replace(tax[,1], "d__",""), 
                        Phylum = str_replace(tax[,2], "p__",""),
                        Class = str_replace(tax[,3], "c__",""),
                        Order = str_replace(tax[,4], "o__",""),
                        Family = str_replace(tax[,5], "f__",""),
                        Genus = str_replace(tax[,6], "g__",""),
                        Species = str_replace(tax[,7], "s__",""), 
                        stringsAsFactors = FALSE)
tax.clean[is.na(tax.clean)] <- ""
tax.clean[tax.clean$Phylum %in% c("uncultured"),2:8] <- ""
out <- c("uncultured")
tax.clean[tax.clean$Class %in% out, 3:8] <- ""
tax.clean[tax.clean$Order %in% out, 4:8] <- ""
tax.clean[tax.clean$Family %in% out, 5:8] <- ""
tax.clean[tax.clean$Genus %in% out, 6:8] <- ""
tax.clean[tax.clean$Species %in% out, 7:8] <- ""

# Remove remove ".", change "-" and " " to "_"
for (i in 1:ncol(tax.clean)){
  tax.clean[,i] <- str_replace_all(tax.clean[,i], "[.]","")
  tax.clean[,i] <- str_replace_all(tax.clean[,i], "[(]","")
  tax.clean[,i] <- str_replace_all(tax.clean[,i], "[)]","")
  tax.clean[,i] <- str_replace_all(tax.clean[,i], "-","_")
  tax.clean[,i] <- str_replace_all(tax.clean[,i], " ","_")
}

for (i in 1:7){ tax.clean[,i] <- as.character(tax.clean[,i])}
# File holes in the tax table
for (i in 1:nrow(tax.clean)){
  #  Fill in missing taxonomy
  if (tax.clean[i,2] == ""){
    kingdom <- paste("Kingdom_", tax.clean[i,1], sep = "")
    tax.clean[i, 2:7] <- kingdom
  } else if (tax.clean[i,3] == ""){
    phylum <- paste("Phylum_", tax.clean[i,2], sep = "")
    tax.clean[i, 3:7] <- phylum
  } else if (tax.clean[i,4] == ""){
    class <- paste("Class_", tax.clean[i,3], sep = "")
    tax.clean[i, 4:7] <- class
  } else if (tax.clean[i,5] == ""){
    order <- paste("Order_", tax.clean[i,4], sep = "")
    tax.clean[i, 5:7] <- order
  } else if (tax.clean[i,6] == ""){
    family <- paste("Family_", tax.clean[i,5], sep = "")
    tax.clean[i, 6:7] <- family
  } else if (tax.clean[i,7] == ""){
    tax.clean$Species[i] <- paste("Genus_",tax.clean$Genus[i], sep = "_")
  }
}
tax_table(all) <- as.matrix(tax.clean)
all.clean <- subset_taxa(all,
  Kingdom != "Unassigned" &
  Kingdom != "Archaea" &
  Kingdom != "Eukaryota" &
  Family != "Mitochondria" &
  Order != "Chloroplast" &
  Genus != "Ralstonia"
) %>%
prune_taxa(taxa_sums(.) > 0, .) %>%
subset_samples(diet != "test") %>%
prune_taxa(taxa_sums(.) > 0, .) %>%
subset_samples(New_Diet != "1%_betamannan") %>%
prune_taxa(taxa_sums(.) > 0, .) %>%
subset_samples(sampleType != "feed") %>%
prune_taxa(taxa_sums(.) > 0, .)

sample_data(all.clean)$New_Diet <- ifelse(sample_data(all.clean)$New_Diet == "Control", "CTR", ifelse(sample_data(all.clean)$New_Diet == "4%_betamannan", "4%MN3", sample_data(all.clean)$New_Diet))
sample_data(all.clean)$New_Diet <- factor(sample_data(all.clean)$New_Diet, levels=c('CTR', '4%MN3'))

all.clean

## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 3505 taxa and 69 samples ]
## sample_data() Sample Data:       [ 69 samples by 12 sample variables ]
## tax_table()   Taxonomy Table:    [ 3505 taxa by 8 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 3505 tips and 3497 internal nodes ]
## refseq()      DNAStringSet:      [ 3505 reference sequences ]

tax_table(all.clean) <- tax_table(all.clean)[, c(1:7)]
sample_data(all.clean)$Group <- paste(sample_data(all.clean)$sampleType,sample_data(all.clean)$New_Diet,sep = "_")
psdata.p <- prune_taxa(taxa_sums(all.clean) >0, all.clean)
psdata.p

## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 3505 taxa and 69 samples ]
## sample_data() Sample Data:       [ 69 samples by 13 sample variables ]
## tax_table()   Taxonomy Table:    [ 3505 taxa by 7 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 3505 tips and 3497 internal nodes ]
## refseq()      DNAStringSet:      [ 3505 reference sequences ]

Rarefaction plot- This R code is using the “ggrare” function to create a rarefaction plot from the data in the “all.clean” object.

annotation_colors <- list(
  #samplingTime = c(`T0`="gray" ,`T1` = "#FF7F00", `T2` = "#FFD92F", `T3`="#F781BF"),
  New_Diet = c(`CTR`="#e41a1cff", `4%MN3`="#984EA3"))


p <- ggrare(all.clean, step = 1000, 
            color = "New_Diet", 
            se = F,
            parallel = TRUE,
            plot = F) 
p + theme_bw() + 
  scale_fill_manual(values =annotation_colors$New_Diet) +
  scale_colour_manual( values = annotation_colors$New_Diet)

Taxonomic classification- Phylum level taxonomic distribution. Bars report the mean abundance for each individual sample.

HG <- subset_samples(all.clean, sampleType == "Hindgut")
HG <- prune_taxa(taxa_sums(HG) > 0, HG)

PC <- subset_samples(all.clean, sampleType == "Pyloric_caeca")
PC <- prune_taxa(taxa_sums(PC) > 0, PC)


HG_T<- transform_sample_counts(HG, function(x) x / sum(x) )
HG.Final.RNA <- aggregate_rare(HG_T, level = "Phylum", detection = 1/100, prevalence = 10/100)
getPalette = colorRampPalette(brewer.pal(10, "Dark2")) 
PhylaPalette = getPalette(10)
cols  <- c(brewer.pal(8,"Set1"), brewer.pal(7,"Dark2"),brewer.pal(7,"Set2"),brewer.pal(12,"Set3"),brewer.pal(7,"Accent"),brewer.pal(12,"Paired"),"gray")

HG.Final.RNA_phylum_plot<- plot_composition(HG.Final.RNA, sample.sort = "Proteobacteria",otu.sort = "abundance", verbose = TRUE, group_by = "New_Diet")
HG.Final.RNA_phylum_plot <- HG.Final.RNA_phylum_plot + 
  theme_bw() + 
  #theme(axis.text.x=element_blank(), axis.ticks.x=element_blank()) +
  scale_fill_manual(values = cols) + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

PC_T<- transform_sample_counts(PC, function(x) x / sum(x) )


PC.Final.RNA <- aggregate_rare(PC_T, level = "Phylum", detection = 1/100, prevalence = 10/100)
getPalette = colorRampPalette(brewer.pal(16, "Dark2")) 
PhylaPalette = getPalette(16)

cols  <- c(brewer.pal(8,"Set1"), brewer.pal(7,"Dark2"),brewer.pal(7,"Set2"),brewer.pal(12,"Set3"),brewer.pal(7,"Accent"),brewer.pal(12,"Paired"),"gray")


PC.Final.RNA_phylum_plot<- plot_composition(PC.Final.RNA, sample.sort = "Proteobacteria",otu.sort = "abundance", verbose = TRUE, group_by = "New_Diet")
PC.Final.RNA_phylum_plot <- PC.Final.RNA_phylum_plot + 
  theme_bw() + 
  #theme(axis.text.x=element_blank(), axis.ticks.x=element_blank()) +
  scale_fill_manual(values = cols) + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))


plot_grid(HG.Final.RNA_phylum_plot, PC.Final.RNA_phylum_plot, ncol = 1, align = "hv")

#Bacterial Community Composition for Manuscript
Final.seq.melt.RNA <- psmelt(tax_glom(all.clean, "Species"))
tax_ranks <- c("Phylum", "Class", "Order", "Family", "Genus", "Species")

for (rank in tax_ranks) {
  n_unique <- length(unique(Final.seq.melt.RNA[[rank]]))
  message(paste(rank, ": ", n_unique, sep = ""))
}

Alpha diversity- Alpha diversity is a measure of the diversity of species within a given area or sample. It can be measured in two different ways: Shannon diversity, which takes into account both the richness and evenness of species in a given sample, or observed richness, which simply counts the total number of species present. Shannon diversity is often used to compare the diversity of different samples, whereas observed richness can be used to compare the diversity of different areas.

# Estimate richness of all.clean
shannon.div <- estimate_richness(all.clean, measures = c("Shannon", "Observed"))

# Get sample data
sampledata1<- data.frame(sample_data(all.clean))

# Rename row names
row.names(shannon.div) <- gsub("X","", row.names(shannon.div))
row.names(shannon.div) <- gsub("[.]","-", row.names(shannon.div))

# Merge data
sampleData <- merge(sampledata1, shannon.div, by = 0 , all = TRUE)

# Create Shannon Plot
plot11 <- ggboxplot(sampleData, x = "New_Diet", y = "Shannon",
          color = "black", palette = "jco", legend = "none", outlier.shape = NA)+ 
 # stat_compare_means(comparisons = my_comparisons)+ # Add pairwise comparisons p-value
  stat_compare_means(label.y = 4.3) +
  ggplot2::geom_jitter(
    mapping = aes_string(fill="New_Diet"),
    position = ggplot2::position_jitter(seed=123), 
    shape = 21,
    color = "black",
    size = 2,
    alpha = 0.8
  ) +
  theme_bw() +  labs(y= "Shannon index") +
  theme(legend.position="none",axis.title.x=element_blank()) +
  scale_fill_manual(values = c( "#E41A1C", "#984EA3")) + 
  scale_colour_manual( values = c( "#E41A1C", "#984EA3")) +
  facet_wrap(. ~ sampleType, ncol = 2) +       
  theme(panel.spacing = unit(1, "cm"))

Beta diversity- Beta diversity is a term used to refer to the differences in species composition between two different sites or habitats. It is often used to measure how different species are distributed across a landscape. The concept of beta diversity was first proposed by ecologist Robert H. Whittaker in 1960. The measure is used to quantify the variation in species composition between two different sites, such as an island and the mainland. The most commonly used measure for beta diversity is the Bray-Curtis index, which looks at the ratio of shared species between two sites. This index is used to measure the differences in species composition between areas and to identify the importance of certain areas in terms of species diversity.

# Calculate PCoA on Bray distance
PCoA_bray_HG <- ordinate(physeq = HG, method = "PCoA", distance = "bray")

# Plot PCoA on Bray distance
PCoA_bray_HG_plot <- plot_ordination(
  physeq = HG, 
  ordination = PCoA_bray_HG, 
  color = "New_Diet"
) + 
  geom_point(shape = 19, alpha=0.7) + 
  theme_bw() + ggtitle("Hindgut: Microbiome") +
    theme(plot.title = element_text(hjust = 0.5))  + 
  xlab("PCoA 1 [47 %]") + ylab("PCoA 2 [9.3 %]") + 
  stat_ellipse() + scale_fill_manual(values = annotation_colors$New_Diet) + 
  scale_colour_manual( values = annotation_colors$New_Diet)+
  guides(color = guide_legend(title = "Diet")) 

# Calculate PCoA on Bray distance
PCoA_bray_PC <- ordinate(physeq = PC, method = "PCoA", distance = "bray")

# Plot PCoA on Bray distance
PCoA_bray_PC_plot <- plot_ordination(
  physeq = PC, 
  ordination = PCoA_bray_PC, 
  color = "New_Diet"
) + 
  geom_point(shape = 19, alpha=0.7) + 
  theme_bw() + ggtitle("Pyloric caeca: Microbiome") +
    theme(plot.title = element_text(hjust = 0.5)) + 
  xlab("PCoA 1 [29.2 %]") + ylab("PCoA 2 [15.4 %]") + 
  stat_ellipse() + scale_fill_manual(values = annotation_colors$New_Diet) + 
  scale_colour_manual( values = annotation_colors$New_Diet) +
  guides(color = guide_legend(title = "Diet")) 


# Grid plot of PCoA plots
up_row <- plot_grid(plot_2A, plot11, labels = c('B', 'C'), align = 'h', nrow = 1, rel_widths = c(0.5,1))
bottom_row <- plot_grid(PCoA_bray_HG_plot, PCoA_bray_PC_plot, labels = c('E', 'F'), align = 'h', ncol = 1)

#Bacterial Community Composition for Manuscript
Final.seq.melt.RNA <- psmelt(tax_glom(HG, "Species"))
tax_ranks <- c("Phylum", "Class", "Order", "Family", "Genus", "Species")

for (rank in tax_ranks) {
  n_unique <- length(unique(Final.seq.melt.RNA[[rank]]))
  message(paste(rank, ": ", n_unique, sep = ""))
}

#Bacterial Community Composition for Manuscript
Final.seq.melt.RNA.PC <- psmelt(tax_glom(PC, "Species"))
tax_ranks <- c("Phylum", "Class", "Order", "Family", "Genus", "Species")

for (rank in tax_ranks) {
  n_unique <- length(unique(Final.seq.melt.RNA.PC[[rank]]))
  message(paste(rank, ": ", n_unique, sep = ""))
}

library(doBy)
Final.seq.melt.RNA.PC <- psmelt(tax_glom(PC, "Species"))
table(grepl("Kingdom", unique(Final.seq.melt.RNA.PC$Phylum)))

## 
## FALSE  TRUE 
##    32     1

table(grepl("Phylum", unique(Final.seq.melt.RNA.PC$Class)))

## 
## FALSE  TRUE 
##    67     2

table(grepl("Class|Phylum", unique(Final.seq.melt.RNA.PC$Order)))

## 
## FALSE  TRUE 
##   147    10

table(grepl("Order|Class|Phylum", unique(Final.seq.melt.RNA.PC$Family)))

## 
## FALSE  TRUE 
##   232    29

table(grepl("Family|Order|Class|Phylum", unique(Final.seq.melt.RNA.PC$Genus)))

## 
## FALSE  TRUE 
##   414    96

table(grepl("Family|Order|Class|Phylum|Genus", unique(Final.seq.melt.RNA.PC$Species)))

## 
## FALSE  TRUE 
##     1   509

Phylum_df <- summaryBy(Abundance~Phylum, data=Final.seq.melt.RNA.PC, FUN=sum)
Phylum_df$Percent <- round(Phylum_df$Abundance.sum/sum(Phylum_df$Abundance.sum)*100, 4)
Phylum_df <- plyr::arrange(Phylum_df, plyr::desc(Percent))
Phylum_df$PercentageRound <- round(Phylum_df$Percent, digits = 2)
DT::datatable(Phylum_df)

class_df <- summaryBy(Abundance~Phylum+Class, data=Final.seq.melt.RNA.PC, FUN=sum)
class_df$Percent <- round(class_df$Abundance.sum/sum(class_df$Abundance.sum)*100, 4)
class_df <- plyr::arrange(class_df, plyr::desc(Percent))
class_df$Round <- round(class_df$Percent, digits = 2)
DT::datatable(class_df)

genus_df <- summaryBy(Abundance~Genus, data=Final.seq.melt.RNA.PC, FUN=sum)
genus_df$Percent <- round(genus_df$Abundance.sum/sum(genus_df$Abundance.sum)*100, 4)
genus_df <- plyr::arrange(genus_df, plyr::desc(Percent))
genus_df$Round <- round(genus_df$Percent, digits = 2)

DT::datatable(genus_df)

library(doBy)
Final.seq.melt.RNA.HG <- psmelt(tax_glom(HG, "Species"))
table(grepl("Kingdom", unique(Final.seq.melt.RNA.HG$Phylum)))

## 
## FALSE  TRUE 
##    35     1

table(grepl("Phylum", unique(Final.seq.melt.RNA.HG$Class)))

## 
## FALSE  TRUE 
##    92     2

table(grepl("Class|Phylum", unique(Final.seq.melt.RNA.HG$Order)))

## 
## FALSE  TRUE 
##   194    12

table(grepl("Order|Class|Phylum", unique(Final.seq.melt.RNA.HG$Family)))

## 
## FALSE  TRUE 
##   313    32

table(grepl("Family|Order|Class|Phylum", unique(Final.seq.melt.RNA.HG$Genus)))

## 
## FALSE  TRUE 
##   567   116

table(grepl("Family|Order|Class|Phylum|Genus", unique(Final.seq.melt.RNA.HG$Species)))

## 
## FALSE  TRUE 
##     1   682

Phylum_df <- summaryBy(Abundance~Phylum, data=Final.seq.melt.RNA.HG, FUN=sum)
Phylum_df$Percent <- round(Phylum_df$Abundance.sum/sum(Phylum_df$Abundance.sum)*100, 4)
Phylum_df <- plyr::arrange(Phylum_df, plyr::desc(Percent))
Phylum_df$PercentageRound <- round(Phylum_df$Percent, digits = 2)
DT::datatable(Phylum_df)

class_df <- summaryBy(Abundance~Phylum+Class, data=Final.seq.melt.RNA.HG, FUN=sum)
class_df$Percent <- round(class_df$Abundance.sum/sum(class_df$Abundance.sum)*100, 4)
class_df <- plyr::arrange(class_df, plyr::desc(Percent))
class_df$Round <- round(class_df$Percent, digits = 2)
DT::datatable(class_df)

genus_df <- summaryBy(Abundance~Genus, data=Final.seq.melt.RNA.HG, FUN=sum)
genus_df$Percent <- round(genus_df$Abundance.sum/sum(genus_df$Abundance.sum)*100, 4)
genus_df <- plyr::arrange(genus_df, plyr::desc(Percent))
genus_df$Round <- round(genus_df$Percent, digits = 2)

DT::datatable(genus_df)

sampledf_HG <- data.frame(sample_data(HG))
bcdist_HG <- phyloseq::distance(HG, method="bray", normalized=TRUE)
adonis2(bcdist_HG ~ New_Diet, data = sampledf_HG, permutations = 9999)

## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = bcdist_HG ~ New_Diet, data = sampledf_HG, permutations = 9999)
##          Df SumOfSqs      R2      F Pr(>F)  
## New_Diet  1  0.20784 0.09337 3.3984  0.015 *
## Residual 33  2.01818 0.90663                
## Total    34  2.22601 1.00000                
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

sampledf_PC <- data.frame(sample_data(PC))
bcdist_PC <- phyloseq::distance(PC, method="bray",normalized=TRUE)
adonis2(bcdist_PC ~ New_Diet, data = sampledf_PC, permutations = 9999)

## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = bcdist_PC ~ New_Diet, data = sampledf_PC, permutations = 9999)
##          Df SumOfSqs      R2      F Pr(>F)   
## New_Diet  1   0.4259 0.08735 3.0627 0.0036 **
## Residual 32   4.4501 0.91265                 
## Total    33   4.8760 1.00000                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Hind gut and Pyloric caeca comparison based on control samples

HG_PC <- subset_samples(all.clean, New_Diet != "4%MN3")
HG_PC <- prune_taxa(taxa_sums(HG_PC) > 0, HG_PC)



# Estimate richness of all.clean
shannon.div <- estimate_richness(HG_PC, measures = c("Shannon", "Observed","Simpson" ))

# Get sample data
sampledata1<- data.frame(sample_data(HG_PC))

# Rename row names
row.names(shannon.div) <- gsub("X","", row.names(shannon.div))
row.names(shannon.div) <- gsub("[.]","-", row.names(shannon.div))

# Merge data
sampleData <- merge(sampledata1, shannon.div, by = 0 , all = TRUE)

# Create Observed Richness Plot
q1 <- ggboxplot(sampleData, x = "sampleType", y = "Observed",
                color = "sampleType", palette = "jco", legend = "none") + 
  stat_compare_means(label.y = 260) +
  geom_jitter(aes(colour = sampleType), size = 2, alpha = 0.6) +
  geom_boxplot(aes(fill = sampleType), width=0.7, alpha = 0.5) +
  theme_bw() +  theme(legend.position="none",axis.title.x=element_blank()) +  
  scale_fill_manual(values = cols) + 
  scale_colour_manual( values = cols)



# Calculate PCoA on Bray distance
PCoA_bray_HG_PC <- ordinate(physeq = HG_PC, method = "PCoA", distance = "bray")

# Plot PCoA on Bray distance
PCoA_bray_HG_PC_plot <- plot_ordination(
  physeq = HG_PC, 
  ordination = PCoA_bray_HG_PC, 
  color = "sampleType"
) + 
  geom_point(shape = 19, alpha=0.7) + 
  theme_bw() + ggtitle("PCoA Plot - Bray") +
    theme(plot.title = element_text(hjust = 0.5))  + 
  xlab("PCoA 1 [46.7 %]") + ylab("PCoA 2 [15.7 %]") + 
  stat_ellipse() + scale_fill_manual(values = cols) + 
  scale_colour_manual( values = cols)

# Run adonis test
sampledf_PC <- data.frame(sample_data(HG_PC))
bcdist_PC <- phyloseq::distance(HG_PC, method="bray",normalized=TRUE)
result <- adonis2(bcdist_PC ~ sampleType, data = sampledf_PC, permutations = 9999)
PCoA_bray_HG_PC_plot <- PCoA_bray_HG_PC_plot + annotate(
  "text", x = 0.35, y = 0.3, 
  label = paste("Adonis R2 =", round(result$R2[1], 3),
                "\np-value =", result$`Pr(>F)`[1]),
  col = "black", fontface = "bold")

plot_grid(q1, PCoA_bray_HG_PC_plot, labels = c('A', 'B'), align = 'hv', ncol = 2, rel_widths = c(0.6, 1))

library(tidyverse) 
library(pheatmap) 

df1 <- rabutable(phylo_ob = all.clean, predictor= "New_Diet", type = "Genus", facet_wrap   ="sampleType", N_taxa = 20)

data = df1 %>%   
  mutate(ID = paste0(variable, ":", wrap, "_", predictor2)) %>%  
  group_by(ID) %>%   
  summarize(meanvalue = mean(log(value))) %>%   
  mutate(feedtime = sub('.*:', '', ID)) %>%  
  mutate(ID = sub(':.*', '', ID)) %>%   
  pivot_wider(names_from = feedtime, values_from = meanvalue)    

rabuplot_df_genus <- as.data.frame(data)
rownames(rabuplot_df_genus) <- data$ID 
rabuplot_df_genus <- rabuplot_df_genus[,-1] 

heatmap_colors <- colorRampPalette(c("#18b29f","#FFFFFF","#ac6721"), interpolate = "spline", space = "rgb")(51)
annotation_col <- 
  colnames(rabuplot_df_genus) %>% 
  as.data.frame() %>% 
  dplyr::rename(common = ".") %>% 
  tidyr::separate(col = common,  into = c("Time", "Diet"), remove = F, extra = "drop") %>% # If replicates are present, we drop them here
  dplyr::select(common,Time, Diet) %>%  
  tibble::column_to_rownames(var = "common")
colnames(annotation_col) <- c("Type" , "Diet")

annotation_col$Diet[1] <- "4%MN3"
annotation_col$Diet[3] <- "4%MN3"
annotation_col$Diet[4] <- "CTR"

annotation_col$Type[1] <- "Hindgut"
annotation_col$Type[2] <- "Hindgut"

annotation_col$Type[3] <- "Pyloric caeca"
annotation_col$Type[4] <- "Pyloric caeca"



annotation_colors <- list(
  Diet = c(`CTR` = "#e41a1cff", `4%MN3` = "#984ea3ff"),
    Type = c(`Hindgut` = "darkorange", `Pyloric caeca` = "darkblue"))



row.names(rabuplot_df_genus)[2] <- "BCP group"   
differential_heatmap_plot <- pheatmap(t(rabuplot_df_genus), 
         annotation_row = annotation_col,
         annotation_colors = annotation_colors,  
        color = heatmap_colors,
         scale = "row", 
         cluster_rows = F, 
         cluster_cols = T, 
         gaps_row = c(2),
         show_colnames = T,
        show_rownames = F)

5 Transcriptomics

# Replace this base URL with the raw content URL of your GitHub repository
base_url <- "https://raw.githubusercontent.com/shashank-KU/ImprovaFish-MDF-Effects/main/"

# Specify the paths to your files on GitHub
host_gut_rawCounts_url <- paste0(base_url, "Transcriptomics/high_dosage_trial/output.txt")
sample_info_url <- paste0(base_url, "Transcriptomics/high_dosage_trial/sample_info.txt")

# Read the files
omics_data_host <- read.table(url(host_gut_rawCounts_url),  header = T, row.names = 1)
sample_info <- read.table(url(sample_info_url),  header = T, row.names = 1)

colnames(omics_data_host) <- sub("\\..*", "", colnames(omics_data_host))

r1 <- row.names(sample_info)
omics_data_host <- omics_data_host[, r1]
#all(colnames(omics_data_host) == rownames(sample_info))

# Hindgut
sample_info_hindgut <- sample_info[sample_info$sampleType == "hindgut", ]
matching_cols <- intersect(colnames(omics_data_host), rownames(sample_info_hindgut))
omics_data_host_gut <- omics_data_host[, matching_cols]

df <- round(omics_data_host_gut) %>%
  # The next steps require a data frame and round() returns a matrix
  as.data.frame() %>%
  # Only keep rows that have total counts above the cutoff
  dplyr::filter(rowSums(.) >= 0)

all(colnames(df) == rownames(sample_info_hindgut))

## [1] TRUE

dds_gut <- DESeqDataSetFromMatrix(
  countData = df, # Our prepped data frame with counts
  colData = sample_info_hindgut, # Data frame with annotation for our samples
  design = ~New_Diet # Here we are not specifying a model
)

# dds_gut$group <- factor(paste0(dds_gut$New_Diet))
# design(dds_gut) <- ~ group-1

dds_gut <- DESeq(dds_gut, parallel = T)
#resultsNames(dds_gut)
res <- results(dds_gut)
res <- res[order(res$padj),]
table(res$padj < 0.05)

## 
## FALSE  TRUE 
## 45745     6

res_tbl <- as_tibble(res, rownames = "ENSEMBL") %>%
  filter(padj <0.05)%>%
  arrange(padj)

vsdata <- vst(dds_gut)
pcaPlot <- DESeq2::plotPCA(vsdata, intgroup = c("New_Diet"), ntop = nrow(dds_gut), returnData = TRUE) 
percentVar <- round(100 * attr(pcaPlot, "percentVar"))

plot12<- ggplot(pcaPlot, aes(PC1, PC2, color=New_Diet)) + 
  geom_point(aes(fill ="New_Diet" ),  size =2.5) + 
  geom_point(shape = 1, size = 2.5,colour = "black") +
  theme_bw() + ggtitle("Hindgut: Host transcriptomics") + 
  theme(plot.title = element_text(hjust = 0.5)) +
  xlab("PC 1 [19 %]") + ylab("PC 2 [9 %]") + 
  stat_ellipse() + 
  scale_fill_manual(values = c("#984EA3", "#E41A1C")) + 
  scale_colour_manual( values = c("#984EA3", "#E41A1C"))+
  guides(color = guide_legend(title = "Diet"))

6 Metatranscriptomics

# Specify the path to your RDS file on GitHub
base_url <- "https://raw.githubusercontent.com/shashank-KU/ImprovaFish-MDF-Effects/main/"

# Specify the paths to your files on GitHub
host_gut_rawCounts_url <- paste0(base_url, "Metatranscriptomics/high_dosage_trial/txi.kallisto.tsv")
sample_info_url <- paste0(base_url, "Metatranscriptomics/high_dosage_trial/sample_info.txt")

# Read the files
metatranscriptomics_data <- read.table(url(host_gut_rawCounts_url), header = T, row.names = 1, sep = "\t")
sample_info <- read.table(url(sample_info_url), header = T, row.names = 1, sep = "\t")


count_columns <- grep("counts\\.", names(metatranscriptomics_data), value = TRUE)
metatranscriptomics_data <- metatranscriptomics_data[count_columns]

colnames(metatranscriptomics_data) <- sub("counts.", "", colnames(metatranscriptomics_data))

r1 <- row.names(sample_info)
metatranscriptomics_data <- metatranscriptomics_data[, r1]
#all(colnames(metatranscriptomics_data) == rownames(sample_info))

# Hindgut
df <- round(metatranscriptomics_data) %>%
  # The next steps require a data frame and round() returns a matrix
  as.data.frame() %>%
  # Only keep rows that have total counts above the cutoff
  dplyr::filter(rowSums(.) >= 50)

all(colnames(df) == rownames(sample_info))

## [1] TRUE

dds_metaT_gut <- DESeqDataSetFromMatrix(
  countData = df, # Our prepped data frame with counts
  colData = sample_info, # Data frame with annotation for our samples
  design = ~New_Diet # Here we are not specifying a model
)

dds_metaT_gut <- DESeq(dds_metaT_gut, parallel = T)
#resultsNames(dds_metaT_gut)
res <- results(dds_metaT_gut)
table(res$padj < 0.05)

## 
## FALSE  TRUE 
## 15376     5

#head(results(dds_metaT_gut, tidy=TRUE)) #let's look at the results table
#summary(res) #summary of results
res <- res[order(res$padj),]
#head(res)
#table(res$padj < 0.05)
res_tbl <- as_tibble(res, rownames = "ENSEMBL") %>%
  filter(padj <0.05)%>%
  arrange(padj)

vsdata <- vst(dds_metaT_gut)
#plotPCA(vsdata, intgroup="New_Diet")  + stat_ellipse() + theme_bw()
pcaPlot <- DESeq2::plotPCA(vsdata, intgroup = c("New_Diet"), ntop = nrow(dds_metaT_gut), returnData = TRUE) 

plot13 <- ggplot(pcaPlot, aes(PC1, PC2, color=New_Diet)) + 
  geom_point(aes(fill ="New_Diet" ),  size =2.5) + 
  geom_point(shape = 1, size = 2.5,colour = "black") +
  theme_bw() + ggtitle("Hindgut: Metatranscriptomics") + 
  theme(plot.title = element_text(hjust = 0.5)) +
  xlab("PC 1 [11 %]") + ylab("PC 2 [4 %]") + 
  stat_ellipse()  + 
  scale_fill_manual(values = c("#984EA3", "#E41A1C")) + 
  scale_colour_manual( values = c("#984EA3", "#E41A1C"))+
  guides(color = guide_legend(title = "Diet"))

left_pannel <- plot_grid(up_row, NULL, differential_heatmap_plot$gtable, align = 'hv', ncol = 1, rel_heights =  c(.6,0.1, 1), labels = c('', "G"))

right_pannel <- plot_grid(PCoA_bray_HG_plot, PCoA_bray_PC_plot, plot12, plot13,  align = "hv", ncol = 1, rel_widths = c(1,1,1,1), labels = c('D','E','F','H'))

plot_grid(left_pannel, right_pannel, align = "hv", rel_widths = c(1.2,1))

High inclusion of beta-mannnan trial

Shashank Gupta

February 09, 2024