Step 1. Package dependencies in R

Installing R packages can be done through various sources such as GitHub, the Comprehensive R Archive Network (CRAN), or by following the official website of the package.

To install a package from GitHub, use the devtools package and the install_github() function. For example, to install the “ggplot2” package from GitHub, run the following code:

library(devtools)
install_github("ggplot2")

To install a package from CRAN, use the install.packages() function. For example, to install the “dplyr” package from CRAN, run the following code:

install.packages("dplyr")

Finally, to install a package from its official website, download the package source code, and use the install.packages() function with the local file path as the argument. For example, to install the “reshape2” package from its official website, first download the source code, then run the following code:

install.packages("path/to/reshape2_package.tar.gz", repos = NULL, type = "source")

It is recommended to regularly update the installed packages to ensure compatibility and to benefit from new features and bug fixes.

Step 2. Downstream analysis with R

We require several packages-

Load packages

library("ranacapa")
library("phyloseq")
library("tidyverse")
library("plyr")
library("reshape2")
library("reshape")
library("tidyr")
library("doBy")
library("plyr")
library("microbiome")
library("ggpubr")
library("vegan")
library("tidyverse")
library("magrittr")
library("cowplot")
library("dendextend")
library("metagenomeSeq")
library("decontam")
library("RColorBrewer")
library("ampvis2")

import data and clean the taxonomy

# Load data
raw <- import_biom("/Users/shashankgupta/Desktop/ImprovAFish/ImprovAFish_4%/ImprovaFish_4_percen/exported-feature-table/feature-table_taxonomy.biom")
tree <- read_tree("/Users/shashankgupta/Desktop/ImprovAFish/ImprovAFish_4%/ImprovaFish_4_percen/exported-feature-table/tree.nwk")
refseq <- Biostrings::readDNAStringSet("/Users/shashankgupta/Desktop/ImprovAFish/ImprovAFish_4%/ImprovaFish_4_percen/exported-feature-table/dna-sequences.fasta", use.names = TRUE)
dat <- read.table("/Users/shashankgupta/Desktop/ImprovAFish/ImprovAFish_4%/ImprovaFish_4_percen/metadata.txt", header = TRUE,row.names = 1, sep = "\t")
# Merge into one complete phyloseq object
all <- merge_phyloseq(raw, sample_data(dat), tree, refseq)
tax <- data.frame(tax_table(all), stringsAsFactors = FALSE)
tax <- tax[,1:7] # No info in col 8-15
# Set informative colnames
colnames(tax) <- c("Kingdom", "Phylum","Class","Order","Family","Genus", "Species")
library(stringr)
tax.clean <- data.frame(row.names = row.names(tax),
                        Kingdom = str_replace(tax[,1], "d__",""), 
                        Phylum = str_replace(tax[,2], "p__",""),
                        Class = str_replace(tax[,3], "c__",""),
                        Order = str_replace(tax[,4], "o__",""),
                        Family = str_replace(tax[,5], "f__",""),
                        Genus = str_replace(tax[,6], "g__",""),
                        Species = str_replace(tax[,7], "s__",""), 
                        stringsAsFactors = FALSE)
tax.clean[is.na(tax.clean)] <- ""

# Remove remove ".", change "-" and " " to "_"
for (i in 1:ncol(tax.clean)){
  tax.clean[,i] <- str_replace_all(tax.clean[,i], "[.]","")
  tax.clean[,i] <- str_replace_all(tax.clean[,i], "[(]","")
  tax.clean[,i] <- str_replace_all(tax.clean[,i], "[)]","")
  tax.clean[,i] <- str_replace_all(tax.clean[,i], "-","_")
  tax.clean[,i] <- str_replace_all(tax.clean[,i], " ","_")
}

for (i in 1:7){ tax.clean[,i] <- as.character(tax.clean[,i])}
# File holes in the tax table
for (i in 1:nrow(tax.clean)){
  #  Fill in missing taxonomy
  if (tax.clean[i,2] == ""){
    kingdom <- paste("Kingdom_", tax.clean[i,1], sep = "")
    tax.clean[i, 2:7] <- kingdom
  } else if (tax.clean[i,3] == ""){
    phylum <- paste("Phylum_", tax.clean[i,2], sep = "")
    tax.clean[i, 3:7] <- phylum
  } else if (tax.clean[i,4] == ""){
    class <- paste("Class_", tax.clean[i,3], sep = "")
    tax.clean[i, 4:7] <- class
  } else if (tax.clean[i,5] == ""){
    order <- paste("Order_", tax.clean[i,4], sep = "")
    tax.clean[i, 5:7] <- order
  } else if (tax.clean[i,6] == ""){
    family <- paste("Family_", tax.clean[i,5], sep = "")
    tax.clean[i, 6:7] <- family
  } else if (tax.clean[i,7] == ""){
    tax.clean$Species[i] <- paste("Genus_",tax.clean$Genus[i], sep = "_")
  }
}

tax_table(all) <- as.matrix(tax.clean)
all
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 5203 taxa and 115 samples ]
## sample_data() Sample Data:       [ 115 samples by 12 sample variables ]
## tax_table()   Taxonomy Table:    [ 5203 taxa by 7 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 5203 tips and 5192 internal nodes ]
## refseq()      DNAStringSet:      [ 5203 reference sequences ]
all.clean <- subset_taxa(all, Kingdom != "Unassigned")
all.clean <- prune_taxa(taxa_sums(all.clean) > 0, all.clean)
all.clean <- subset_samples(all.clean, diet != "test")
all.clean <- prune_taxa(taxa_sums(all.clean) > 0, all.clean)
all.clean
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 5193 taxa and 113 samples ]
## sample_data() Sample Data:       [ 113 samples by 12 sample variables ]
## tax_table()   Taxonomy Table:    [ 5193 taxa by 7 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 5193 tips and 5182 internal nodes ]
## refseq()      DNAStringSet:      [ 5193 reference sequences ]

Rarefaction plot

This R code is using the “ggrare” function to create a rarefaction plot from the data in the “all.clean” object.

p <- ggrare(all.clean, step = 1000, 
            color = "New_Diet", 
            label = "sampleType", se = F,
            parallel = TRUE,
            plot = FALSE)
cols  <- c(brewer.pal(8,"Set1"), brewer.pal(7,"Dark2"),brewer.pal(7,"Set2"),brewer.pal(12,"Set3"),brewer.pal(7,"Accent"),brewer.pal(12,"Paired"),"gray")

p <- p + theme_bw() + 
  scale_fill_manual(values =cols) +
  scale_colour_manual( values = cols) +
  facet_wrap(~New_Diet)
p

Alpha diversity

Alpha diversity is a measure of the diversity of species within a given area or sample. It can be measured in two different ways: Shannon diversity, which takes into account both the richness and evenness of species in a given sample, or observed richness, which simply counts the total number of species present. Shannon diversity is often used to compare the diversity of different samples, whereas observed richness can be used to compare the diversity of different areas.

# Estimate richness of all.clean
shannon.div <- estimate_richness(all.clean, measures = c("Shannon", "Simpson", "Observed","Chao1"))

# Get sample data
sampledata1<- data.frame(sample_data(all.clean))

# Rename row names
row.names(shannon.div) <- gsub("X","", row.names(shannon.div))
row.names(shannon.div) <- gsub("[.]","-", row.names(shannon.div))


# Merge data
sampleData <- merge(sampledata1, shannon.div, by = 0 , all = TRUE)

# Factorize New_Diet
sampleData$New_Diet <- factor(sampleData$New_Diet, levels=c( 'Control', '1%_betamannan', '4%_betamannan'))

# List of comparisons
my_comparisons <- list( c("Control", "1%_betamannan"), 
                        c("Control", "4%_betamannan"), 
                        c("1%_betamannan", "4%_betamannan"))

# Create Observed Richness Plot
p1 <- ggboxplot(sampleData, x = "New_Diet", y = "Observed",
                color = "New_Diet", palette = "jco", legend = "none") + 
  stat_compare_means(comparisons = my_comparisons) +
  stat_compare_means(label.y = 400) +
  geom_jitter(aes(colour = New_Diet), size = 2, alpha = 0.6) +
  geom_boxplot(aes(fill = New_Diet), width=0.7, alpha = 0.5) +
  theme_bw() +  theme(legend.position="none",axis.title.x=element_blank()) +  
  scale_fill_manual(values = cols) + 
  scale_colour_manual( values = cols) +
  facet_wrap("sampleType")
## [1] FALSE
# Create Shannon Plot
p2 <- ggboxplot(sampleData, x = "New_Diet", y = "Shannon",
          color = "New_Diet", palette = "jco", legend = "none") + 
  stat_compare_means(comparisons = my_comparisons) +
  stat_compare_means(label.y = 5) +
  geom_jitter(aes(colour = New_Diet), size = 2, alpha = 0.6) +
  geom_boxplot(aes(fill = New_Diet), width=0.7, alpha = 0.5) +
  theme_bw() +  theme(legend.position="none",axis.title.x=element_blank()) +
  facet_wrap("sampleType")
## [1] FALSE

Beta diversity

Beta diversity is a term used to refer to the differences in species composition between two different sites or habitats. It is often used to measure how different species are distributed across a landscape. The concept of beta diversity was first proposed by ecologist Robert H. Whittaker in 1960. The measure is used to quantify the variation in species composition between two different sites, such as an island and the mainland. The most commonly used measure for beta diversity is the Bray-Curtis index, which looks at the ratio of shared species between two sites. This index is used to measure the differences in species composition between areas and to identify the importance of certain areas in terms of species diversity.

# Calculate PCoA on Bray distance
PCoA_bray <- ordinate(physeq = all.clean, method = "PCoA", distance = "bray")

# Plot PCoA on Bray distance
PCoA_bray_plot <- plot_ordination(
  physeq = all.clean, 
  ordination = PCoA_bray, 
  color = "New_Diet"
) + 
  geom_point(shape = 19, alpha=0.7) + 
  theme_bw() + ggtitle("PCoA Plot - Bray") + 
  xlab("PCoA 1 [43.5 %]") + ylab("PCoA 2 [27.5 %]") + 
  stat_ellipse() + scale_fill_manual(values = cols) + 
  scale_colour_manual( values = cols) + 
  facet_wrap("sampleType")

# Grid plot of PCoA plots
bottom_row <- plot_grid(p1, p2, labels = c('A', 'B'), align = 'h', rel_widths = c(1, 1))
plot_grid(bottom_row, PCoA_bray_plot, labels = c('', 'C'), ncol = 1)

Taxonomic classification

Phylum level taxonomic distribution. Bars report the mean abundance for each individual sample.

psdata.r<- transform_sample_counts(all.clean, function(x) x / sum(x) )
Final.RNA <- aggregate_rare(psdata.r, level = "Phylum", detection = 1/100, prevalence = 20/100)
getPalette = colorRampPalette(brewer.pal(10, "Dark2")) 
PhylaPalette = getPalette(10)

Final.RNA_phylum_plot<- plot_composition(Final.RNA, sample.sort = "Proteobacteria",otu.sort = "abundance", verbose = TRUE)
Final.RNA_phylum_plot <- Final.RNA_phylum_plot + 
  theme_bw() + 
  theme(axis.text.x=element_blank(), axis.ticks.x=element_blank()) +
  scale_fill_manual(values = PhylaPalette)

Final.RNA_phylum_plot

#Bacterial Community Composition for Manuscript
Final.seq.melt.RNA <- psmelt(tax_glom(psdata.r, "Species"))
tax_ranks <- c("Phylum", "Class", "Order", "Family", "Genus", "Species")

for (rank in tax_ranks) {
  n_unique <- length(unique(Final.seq.melt.RNA[[rank]]))
  message(paste(rank, ": ", n_unique, sep = ""))
}
## Phylum: 52
## Class: 117
## Order: 245
## Family: 409
## Genus: 850
## Species: 988

Differenital abundance

Relative abundances in the salmon samples. Comparison among the 20 most abundant bacterial genera. Relative abundance of each genus is shown with respect to different dosage of diet compared to control samples, and stratified by sample type. P-values correspond to Wilcoxon rank-sum tests of the relative abundances, with significant values (P < 0.05) bolded. A pseudocount (+1e−06) was added to all abundances for the log-scale presentation. The black dots indicate median values and the abundances are colored according to the diet.

p3 <- rabuplot(phylo_ob = psdata.r, predictor= "New_Diet", type = "Genus", facet_wrap   ="sampleType")
knitr::include_graphics("Images/plot3.png")