Load all the packages
library("ranacapa")
library("phyloseq")
library("ggplot2")
library("stringr")
library("plyr")
library("reshape2")
library("reshape")
library("dplyr")
library("tidyr")
library("doBy")
library("plyr")
library("microbiome")
library("ggpubr")
library("vegan")
library("tidyverse")
library("magrittr")
library("cowplot")
library("dendextend")
library("WGCNA")
library("metagenomeSeq")
library("decontam")
library("RColorBrewer")
library("ampvis2")
library("ggpubr")
library("formatR")
library("DT")
library("taxcleanr") #devtools::install_github("shashank-KU/taxcleanr")
library("pheatmap")
library("DESeq2")
library("microbiomeMarker")
# Specify the path to your CSV file on GitHub
csv_file <- "Phenotypic/Metrics_high_dosage.csv"
csv_url <- paste0("https://raw.githubusercontent.com/shashank-KU/ImprovaFish-MDF-Effects/main/", csv_file)
# Read the CSV file
fish_data <- read.csv(url(csv_url), row.names = 1)
fish_data$Diet <- factor(fish_data$Diet, levels=c('CTR', '4%MN3'))
annotation_colors <- list(
Diet = c(`CTR`="#e41a1cff", `4%MN3`="#984EA3"))
plot_2A <- ggboxplot(fish_data, x = "Diet", y = "Body.weight",
color = "black", palette = "jco", legend = "none", outlier.shape = NA)+
#stat_compare_means(comparisons = my_comparisons, hide.ns = F) +
stat_compare_means(label.y = 370) +
ggplot2::geom_jitter(
mapping = aes_string(fill="Diet"),
position = ggplot2::position_jitter(seed=123),
shape = 21,
color = "black",
size = 2,
alpha = 0.8
) +
theme_bw() + ylab("Body weight") +
theme(legend.position="none",axis.title.x=element_blank()) +
scale_fill_manual(values = annotation_colors$Diet) +
scale_colour_manual( values = annotation_colors$Diet)
# fish_data %>%
# complete(Timepoint, Diet, fill = list(BW = NA, Length = NA)) %>%
# group_by(Timepoint, Diet) %>%
# summarise(across(c(Heart_Score, Fat_Score,cataract_V,cataract_H ), mean, na.rm = TRUE), .groups = 'drop') %>%
# mutate(across(where(is.numeric), round, 2)) %>%
# DT::datatable()
library(dplyr)
library(tidyr)
fish_data %>%
complete(Diet, fill = list(Body.weight = NA, Length = NA, GW = NA, CF = NA, HSI = NA, CSI = NA, Heart_W= NA,Liver_W = NA)) %>%
group_by(Diet) %>%
summarise(
Mean_BW = mean(Body.weight, na.rm = TRUE),
Mean_Length = mean(Length, na.rm = TRUE),
Mean_GW = mean(GW, na.rm = TRUE),
Mean_CF = mean(CF, na.rm = TRUE),
Mean_HSI = mean(HSI, na.rm = TRUE),
Mean_CSI = mean(CSI, na.rm = TRUE),
Mean_Heart_W = mean(Heart_W, na.rm = TRUE),
Mean_Liver_W = mean(Liver_W, na.rm = TRUE),
SEM_BW = sd(Body.weight, na.rm = TRUE) / sqrt(sum(!is.na(Body.weight))),
SEM_Length = sd(Length, na.rm = TRUE) / sqrt(sum(!is.na(Length))),
SEM_GW = sd(GW, na.rm = TRUE) / sqrt(sum(!is.na(GW))),
SEM_CF = sd(CF, na.rm = TRUE) / sqrt(sum(!is.na(CF))),
SEM_HSI = sd(HSI, na.rm = TRUE) / sqrt(sum(!is.na(HSI))),
SEM_CSI = sd(CSI, na.rm = TRUE) / sqrt(sum(!is.na(CSI))),
SEM_Heart_W = sd(Heart_W, na.rm = TRUE) / sqrt(sum(!is.na(Heart_W))),
SEM_Liver_W = sd(Liver_W, na.rm = TRUE) / sqrt(sum(!is.na(Liver_W))),
.groups = 'drop'
) %>%
mutate(
across(where(is.numeric), ~round(., 2)),
across(starts_with("Mean_"), ~paste(., "±", get(paste0("SEM_", sub("Mean_", "", cur_column())))))
) %>%
filter(complete.cases(.)) %>%
select(Diet, starts_with("Mean_"), -starts_with("SEM_")) %>%
rename_with(~sub("Mean_", "", .), starts_with("Mean_")) %>%
DT::datatable()
Import data and clean the taxonomy
# Load data
base_url <- "https://raw.githubusercontent.com/shashank-KU/ImprovaFish-MDF-Effects/main/"
# Specify the path to your RDS file on GitHub
rds_file <- "Metagenomics/high_dosage_trial/metagenomics_high_MDF.rdata"
rds_url <- paste0(base_url, rds_file)
# Read the RDS file
all <- readRDS(url(rds_url))
tax <- data.frame(tax_table(all), stringsAsFactors = FALSE)
tax <- tax[,1:7] # No info in col 8-15
# Set informative colnames
colnames(tax) <- c("Kingdom", "Phylum","Class","Order","Family","Genus", "Species")
library(stringr)
tax.clean <- data.frame(row.names = row.names(tax),
Kingdom = str_replace(tax[,1], "d__",""),
Phylum = str_replace(tax[,2], "p__",""),
Class = str_replace(tax[,3], "c__",""),
Order = str_replace(tax[,4], "o__",""),
Family = str_replace(tax[,5], "f__",""),
Genus = str_replace(tax[,6], "g__",""),
Species = str_replace(tax[,7], "s__",""),
stringsAsFactors = FALSE)
tax.clean[is.na(tax.clean)] <- ""
tax.clean[tax.clean$Phylum %in% c("uncultured"),2:8] <- ""
out <- c("uncultured")
tax.clean[tax.clean$Class %in% out, 3:8] <- ""
tax.clean[tax.clean$Order %in% out, 4:8] <- ""
tax.clean[tax.clean$Family %in% out, 5:8] <- ""
tax.clean[tax.clean$Genus %in% out, 6:8] <- ""
tax.clean[tax.clean$Species %in% out, 7:8] <- ""
# Remove remove ".", change "-" and " " to "_"
for (i in 1:ncol(tax.clean)){
tax.clean[,i] <- str_replace_all(tax.clean[,i], "[.]","")
tax.clean[,i] <- str_replace_all(tax.clean[,i], "[(]","")
tax.clean[,i] <- str_replace_all(tax.clean[,i], "[)]","")
tax.clean[,i] <- str_replace_all(tax.clean[,i], "-","_")
tax.clean[,i] <- str_replace_all(tax.clean[,i], " ","_")
}
for (i in 1:7){ tax.clean[,i] <- as.character(tax.clean[,i])}
# File holes in the tax table
for (i in 1:nrow(tax.clean)){
# Fill in missing taxonomy
if (tax.clean[i,2] == ""){
kingdom <- paste("Kingdom_", tax.clean[i,1], sep = "")
tax.clean[i, 2:7] <- kingdom
} else if (tax.clean[i,3] == ""){
phylum <- paste("Phylum_", tax.clean[i,2], sep = "")
tax.clean[i, 3:7] <- phylum
} else if (tax.clean[i,4] == ""){
class <- paste("Class_", tax.clean[i,3], sep = "")
tax.clean[i, 4:7] <- class
} else if (tax.clean[i,5] == ""){
order <- paste("Order_", tax.clean[i,4], sep = "")
tax.clean[i, 5:7] <- order
} else if (tax.clean[i,6] == ""){
family <- paste("Family_", tax.clean[i,5], sep = "")
tax.clean[i, 6:7] <- family
} else if (tax.clean[i,7] == ""){
tax.clean$Species[i] <- paste("Genus_",tax.clean$Genus[i], sep = "_")
}
}
tax_table(all) <- as.matrix(tax.clean)
all.clean <- subset_taxa(all,
Kingdom != "Unassigned" &
Kingdom != "Archaea" &
Kingdom != "Eukaryota" &
Family != "Mitochondria" &
Order != "Chloroplast" &
Genus != "Ralstonia"
) %>%
prune_taxa(taxa_sums(.) > 0, .) %>%
subset_samples(diet != "test") %>%
prune_taxa(taxa_sums(.) > 0, .) %>%
subset_samples(New_Diet != "1%_betamannan") %>%
prune_taxa(taxa_sums(.) > 0, .) %>%
subset_samples(sampleType != "feed") %>%
prune_taxa(taxa_sums(.) > 0, .)
sample_data(all.clean)$New_Diet <- ifelse(sample_data(all.clean)$New_Diet == "Control", "CTR", ifelse(sample_data(all.clean)$New_Diet == "4%_betamannan", "4%MN3", sample_data(all.clean)$New_Diet))
sample_data(all.clean)$New_Diet <- factor(sample_data(all.clean)$New_Diet, levels=c('CTR', '4%MN3'))
all.clean
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 3505 taxa and 69 samples ]
## sample_data() Sample Data: [ 69 samples by 12 sample variables ]
## tax_table() Taxonomy Table: [ 3505 taxa by 8 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 3505 tips and 3497 internal nodes ]
## refseq() DNAStringSet: [ 3505 reference sequences ]
tax_table(all.clean) <- tax_table(all.clean)[, c(1:7)]
sample_data(all.clean)$Group <- paste(sample_data(all.clean)$sampleType,sample_data(all.clean)$New_Diet,sep = "_")
psdata.p <- prune_taxa(taxa_sums(all.clean) >0, all.clean)
psdata.p
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 3505 taxa and 69 samples ]
## sample_data() Sample Data: [ 69 samples by 13 sample variables ]
## tax_table() Taxonomy Table: [ 3505 taxa by 7 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 3505 tips and 3497 internal nodes ]
## refseq() DNAStringSet: [ 3505 reference sequences ]
Rarefaction plot- This R code is using the “ggrare” function to create a rarefaction plot from the data in the “all.clean” object.
annotation_colors <- list(
#samplingTime = c(`T0`="gray" ,`T1` = "#FF7F00", `T2` = "#FFD92F", `T3`="#F781BF"),
New_Diet = c(`CTR`="#e41a1cff", `4%MN3`="#984EA3"))
p <- ggrare(all.clean, step = 1000,
color = "New_Diet",
se = F,
parallel = TRUE,
plot = F)
p + theme_bw() +
scale_fill_manual(values =annotation_colors$New_Diet) +
scale_colour_manual( values = annotation_colors$New_Diet)
Taxonomic classification- Phylum level taxonomic distribution. Bars report the mean abundance for each individual sample.
HG <- subset_samples(all.clean, sampleType == "Hindgut")
HG <- prune_taxa(taxa_sums(HG) > 0, HG)
PC <- subset_samples(all.clean, sampleType == "Pyloric_caeca")
PC <- prune_taxa(taxa_sums(PC) > 0, PC)
HG_T<- transform_sample_counts(HG, function(x) x / sum(x) )
HG.Final.RNA <- aggregate_rare(HG_T, level = "Phylum", detection = 1/100, prevalence = 10/100)
getPalette = colorRampPalette(brewer.pal(10, "Dark2"))
PhylaPalette = getPalette(10)
cols <- c(brewer.pal(8,"Set1"), brewer.pal(7,"Dark2"),brewer.pal(7,"Set2"),brewer.pal(12,"Set3"),brewer.pal(7,"Accent"),brewer.pal(12,"Paired"),"gray")
HG.Final.RNA_phylum_plot<- plot_composition(HG.Final.RNA, sample.sort = "Proteobacteria",otu.sort = "abundance", verbose = TRUE, group_by = "New_Diet")
HG.Final.RNA_phylum_plot <- HG.Final.RNA_phylum_plot +
theme_bw() +
#theme(axis.text.x=element_blank(), axis.ticks.x=element_blank()) +
scale_fill_manual(values = cols) + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
PC_T<- transform_sample_counts(PC, function(x) x / sum(x) )
PC.Final.RNA <- aggregate_rare(PC_T, level = "Phylum", detection = 1/100, prevalence = 10/100)
getPalette = colorRampPalette(brewer.pal(16, "Dark2"))
PhylaPalette = getPalette(16)
cols <- c(brewer.pal(8,"Set1"), brewer.pal(7,"Dark2"),brewer.pal(7,"Set2"),brewer.pal(12,"Set3"),brewer.pal(7,"Accent"),brewer.pal(12,"Paired"),"gray")
PC.Final.RNA_phylum_plot<- plot_composition(PC.Final.RNA, sample.sort = "Proteobacteria",otu.sort = "abundance", verbose = TRUE, group_by = "New_Diet")
PC.Final.RNA_phylum_plot <- PC.Final.RNA_phylum_plot +
theme_bw() +
#theme(axis.text.x=element_blank(), axis.ticks.x=element_blank()) +
scale_fill_manual(values = cols) + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
plot_grid(HG.Final.RNA_phylum_plot, PC.Final.RNA_phylum_plot, ncol = 1, align = "hv")
#Bacterial Community Composition for Manuscript
Final.seq.melt.RNA <- psmelt(tax_glom(all.clean, "Species"))
tax_ranks <- c("Phylum", "Class", "Order", "Family", "Genus", "Species")
for (rank in tax_ranks) {
n_unique <- length(unique(Final.seq.melt.RNA[[rank]]))
message(paste(rank, ": ", n_unique, sep = ""))
}
Alpha diversity- Alpha diversity is a measure of the diversity of species within a given area or sample. It can be measured in two different ways: Shannon diversity, which takes into account both the richness and evenness of species in a given sample, or observed richness, which simply counts the total number of species present. Shannon diversity is often used to compare the diversity of different samples, whereas observed richness can be used to compare the diversity of different areas.
# Estimate richness of all.clean
shannon.div <- estimate_richness(all.clean, measures = c("Shannon", "Observed"))
# Get sample data
sampledata1<- data.frame(sample_data(all.clean))
# Rename row names
row.names(shannon.div) <- gsub("X","", row.names(shannon.div))
row.names(shannon.div) <- gsub("[.]","-", row.names(shannon.div))
# Merge data
sampleData <- merge(sampledata1, shannon.div, by = 0 , all = TRUE)
# Create Shannon Plot
plot11 <- ggboxplot(sampleData, x = "New_Diet", y = "Shannon",
color = "black", palette = "jco", legend = "none", outlier.shape = NA)+
# stat_compare_means(comparisons = my_comparisons)+ # Add pairwise comparisons p-value
stat_compare_means(label.y = 4.3) +
ggplot2::geom_jitter(
mapping = aes_string(fill="New_Diet"),
position = ggplot2::position_jitter(seed=123),
shape = 21,
color = "black",
size = 2,
alpha = 0.8
) +
theme_bw() + labs(y= "Shannon index") +
theme(legend.position="none",axis.title.x=element_blank()) +
scale_fill_manual(values = c( "#E41A1C", "#984EA3")) +
scale_colour_manual( values = c( "#E41A1C", "#984EA3")) +
facet_wrap(. ~ sampleType, ncol = 2) +
theme(panel.spacing = unit(1, "cm"))
Beta diversity- Beta diversity is a term used to refer to the differences in species composition between two different sites or habitats. It is often used to measure how different species are distributed across a landscape. The concept of beta diversity was first proposed by ecologist Robert H. Whittaker in 1960. The measure is used to quantify the variation in species composition between two different sites, such as an island and the mainland. The most commonly used measure for beta diversity is the Bray-Curtis index, which looks at the ratio of shared species between two sites. This index is used to measure the differences in species composition between areas and to identify the importance of certain areas in terms of species diversity.
# Calculate PCoA on Bray distance
PCoA_bray_HG <- ordinate(physeq = HG, method = "PCoA", distance = "bray")
# Plot PCoA on Bray distance
PCoA_bray_HG_plot <- plot_ordination(
physeq = HG,
ordination = PCoA_bray_HG,
color = "New_Diet"
) +
geom_point(shape = 19, alpha=0.7) +
theme_bw() + ggtitle("Hindgut: Microbiome") +
theme(plot.title = element_text(hjust = 0.5)) +
xlab("PCoA 1 [47 %]") + ylab("PCoA 2 [9.3 %]") +
stat_ellipse() + scale_fill_manual(values = annotation_colors$New_Diet) +
scale_colour_manual( values = annotation_colors$New_Diet)+
guides(color = guide_legend(title = "Diet"))
# Calculate PCoA on Bray distance
PCoA_bray_PC <- ordinate(physeq = PC, method = "PCoA", distance = "bray")
# Plot PCoA on Bray distance
PCoA_bray_PC_plot <- plot_ordination(
physeq = PC,
ordination = PCoA_bray_PC,
color = "New_Diet"
) +
geom_point(shape = 19, alpha=0.7) +
theme_bw() + ggtitle("Pyloric caeca: Microbiome") +
theme(plot.title = element_text(hjust = 0.5)) +
xlab("PCoA 1 [29.2 %]") + ylab("PCoA 2 [15.4 %]") +
stat_ellipse() + scale_fill_manual(values = annotation_colors$New_Diet) +
scale_colour_manual( values = annotation_colors$New_Diet) +
guides(color = guide_legend(title = "Diet"))
# Grid plot of PCoA plots
up_row <- plot_grid(plot_2A, plot11, labels = c('B', 'C'), align = 'h', nrow = 1, rel_widths = c(0.5,1))
bottom_row <- plot_grid(PCoA_bray_HG_plot, PCoA_bray_PC_plot, labels = c('E', 'F'), align = 'h', ncol = 1)
#Bacterial Community Composition for Manuscript
Final.seq.melt.RNA <- psmelt(tax_glom(HG, "Species"))
tax_ranks <- c("Phylum", "Class", "Order", "Family", "Genus", "Species")
for (rank in tax_ranks) {
n_unique <- length(unique(Final.seq.melt.RNA[[rank]]))
message(paste(rank, ": ", n_unique, sep = ""))
}
#Bacterial Community Composition for Manuscript
Final.seq.melt.RNA.PC <- psmelt(tax_glom(PC, "Species"))
tax_ranks <- c("Phylum", "Class", "Order", "Family", "Genus", "Species")
for (rank in tax_ranks) {
n_unique <- length(unique(Final.seq.melt.RNA.PC[[rank]]))
message(paste(rank, ": ", n_unique, sep = ""))
}
library(doBy)
Final.seq.melt.RNA.PC <- psmelt(tax_glom(PC, "Species"))
table(grepl("Kingdom", unique(Final.seq.melt.RNA.PC$Phylum)))
##
## FALSE TRUE
## 32 1
##
## FALSE TRUE
## 67 2
##
## FALSE TRUE
## 147 10
##
## FALSE TRUE
## 232 29
##
## FALSE TRUE
## 414 96
##
## FALSE TRUE
## 1 509
Phylum_df <- summaryBy(Abundance~Phylum, data=Final.seq.melt.RNA.PC, FUN=sum)
Phylum_df$Percent <- round(Phylum_df$Abundance.sum/sum(Phylum_df$Abundance.sum)*100, 4)
Phylum_df <- plyr::arrange(Phylum_df, plyr::desc(Percent))
Phylum_df$PercentageRound <- round(Phylum_df$Percent, digits = 2)
DT::datatable(Phylum_df)
class_df <- summaryBy(Abundance~Phylum+Class, data=Final.seq.melt.RNA.PC, FUN=sum)
class_df$Percent <- round(class_df$Abundance.sum/sum(class_df$Abundance.sum)*100, 4)
class_df <- plyr::arrange(class_df, plyr::desc(Percent))
class_df$Round <- round(class_df$Percent, digits = 2)
DT::datatable(class_df)
genus_df <- summaryBy(Abundance~Genus, data=Final.seq.melt.RNA.PC, FUN=sum)
genus_df$Percent <- round(genus_df$Abundance.sum/sum(genus_df$Abundance.sum)*100, 4)
genus_df <- plyr::arrange(genus_df, plyr::desc(Percent))
genus_df$Round <- round(genus_df$Percent, digits = 2)
DT::datatable(genus_df)
library(doBy)
Final.seq.melt.RNA.HG <- psmelt(tax_glom(HG, "Species"))
table(grepl("Kingdom", unique(Final.seq.melt.RNA.HG$Phylum)))
##
## FALSE TRUE
## 35 1
##
## FALSE TRUE
## 92 2
##
## FALSE TRUE
## 194 12
##
## FALSE TRUE
## 313 32
##
## FALSE TRUE
## 567 116
##
## FALSE TRUE
## 1 682
Phylum_df <- summaryBy(Abundance~Phylum, data=Final.seq.melt.RNA.HG, FUN=sum)
Phylum_df$Percent <- round(Phylum_df$Abundance.sum/sum(Phylum_df$Abundance.sum)*100, 4)
Phylum_df <- plyr::arrange(Phylum_df, plyr::desc(Percent))
Phylum_df$PercentageRound <- round(Phylum_df$Percent, digits = 2)
DT::datatable(Phylum_df)
class_df <- summaryBy(Abundance~Phylum+Class, data=Final.seq.melt.RNA.HG, FUN=sum)
class_df$Percent <- round(class_df$Abundance.sum/sum(class_df$Abundance.sum)*100, 4)
class_df <- plyr::arrange(class_df, plyr::desc(Percent))
class_df$Round <- round(class_df$Percent, digits = 2)
DT::datatable(class_df)
genus_df <- summaryBy(Abundance~Genus, data=Final.seq.melt.RNA.HG, FUN=sum)
genus_df$Percent <- round(genus_df$Abundance.sum/sum(genus_df$Abundance.sum)*100, 4)
genus_df <- plyr::arrange(genus_df, plyr::desc(Percent))
genus_df$Round <- round(genus_df$Percent, digits = 2)
DT::datatable(genus_df)
sampledf_HG <- data.frame(sample_data(HG))
bcdist_HG <- phyloseq::distance(HG, method="bray", normalized=TRUE)
adonis2(bcdist_HG ~ New_Diet, data = sampledf_HG, permutations = 9999)
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = bcdist_HG ~ New_Diet, data = sampledf_HG, permutations = 9999)
## Df SumOfSqs R2 F Pr(>F)
## New_Diet 1 0.20784 0.09337 3.3984 0.015 *
## Residual 33 2.01818 0.90663
## Total 34 2.22601 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
sampledf_PC <- data.frame(sample_data(PC))
bcdist_PC <- phyloseq::distance(PC, method="bray",normalized=TRUE)
adonis2(bcdist_PC ~ New_Diet, data = sampledf_PC, permutations = 9999)
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = bcdist_PC ~ New_Diet, data = sampledf_PC, permutations = 9999)
## Df SumOfSqs R2 F Pr(>F)
## New_Diet 1 0.4259 0.08735 3.0627 0.0036 **
## Residual 32 4.4501 0.91265
## Total 33 4.8760 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Hind gut and Pyloric caeca comparison based on control samples
HG_PC <- subset_samples(all.clean, New_Diet != "4%MN3")
HG_PC <- prune_taxa(taxa_sums(HG_PC) > 0, HG_PC)
# Estimate richness of all.clean
shannon.div <- estimate_richness(HG_PC, measures = c("Shannon", "Observed","Simpson" ))
# Get sample data
sampledata1<- data.frame(sample_data(HG_PC))
# Rename row names
row.names(shannon.div) <- gsub("X","", row.names(shannon.div))
row.names(shannon.div) <- gsub("[.]","-", row.names(shannon.div))
# Merge data
sampleData <- merge(sampledata1, shannon.div, by = 0 , all = TRUE)
# Create Observed Richness Plot
q1 <- ggboxplot(sampleData, x = "sampleType", y = "Observed",
color = "sampleType", palette = "jco", legend = "none") +
stat_compare_means(label.y = 260) +
geom_jitter(aes(colour = sampleType), size = 2, alpha = 0.6) +
geom_boxplot(aes(fill = sampleType), width=0.7, alpha = 0.5) +
theme_bw() + theme(legend.position="none",axis.title.x=element_blank()) +
scale_fill_manual(values = cols) +
scale_colour_manual( values = cols)
# Calculate PCoA on Bray distance
PCoA_bray_HG_PC <- ordinate(physeq = HG_PC, method = "PCoA", distance = "bray")
# Plot PCoA on Bray distance
PCoA_bray_HG_PC_plot <- plot_ordination(
physeq = HG_PC,
ordination = PCoA_bray_HG_PC,
color = "sampleType"
) +
geom_point(shape = 19, alpha=0.7) +
theme_bw() + ggtitle("PCoA Plot - Bray") +
theme(plot.title = element_text(hjust = 0.5)) +
xlab("PCoA 1 [46.7 %]") + ylab("PCoA 2 [15.7 %]") +
stat_ellipse() + scale_fill_manual(values = cols) +
scale_colour_manual( values = cols)
# Run adonis test
sampledf_PC <- data.frame(sample_data(HG_PC))
bcdist_PC <- phyloseq::distance(HG_PC, method="bray",normalized=TRUE)
result <- adonis2(bcdist_PC ~ sampleType, data = sampledf_PC, permutations = 9999)
PCoA_bray_HG_PC_plot <- PCoA_bray_HG_PC_plot + annotate(
"text", x = 0.35, y = 0.3,
label = paste("Adonis R2 =", round(result$R2[1], 3),
"\np-value =", result$`Pr(>F)`[1]),
col = "black", fontface = "bold")
plot_grid(q1, PCoA_bray_HG_PC_plot, labels = c('A', 'B'), align = 'hv', ncol = 2, rel_widths = c(0.6, 1))
library(tidyverse)
library(pheatmap)
df1 <- rabutable(phylo_ob = all.clean, predictor= "New_Diet", type = "Genus", facet_wrap ="sampleType", N_taxa = 20)
data = df1 %>%
mutate(ID = paste0(variable, ":", wrap, "_", predictor2)) %>%
group_by(ID) %>%
summarize(meanvalue = mean(log(value))) %>%
mutate(feedtime = sub('.*:', '', ID)) %>%
mutate(ID = sub(':.*', '', ID)) %>%
pivot_wider(names_from = feedtime, values_from = meanvalue)
rabuplot_df_genus <- as.data.frame(data)
rownames(rabuplot_df_genus) <- data$ID
rabuplot_df_genus <- rabuplot_df_genus[,-1]
heatmap_colors <- colorRampPalette(c("#18b29f","#FFFFFF","#ac6721"), interpolate = "spline", space = "rgb")(51)
annotation_col <-
colnames(rabuplot_df_genus) %>%
as.data.frame() %>%
dplyr::rename(common = ".") %>%
tidyr::separate(col = common, into = c("Time", "Diet"), remove = F, extra = "drop") %>% # If replicates are present, we drop them here
dplyr::select(common,Time, Diet) %>%
tibble::column_to_rownames(var = "common")
colnames(annotation_col) <- c("Type" , "Diet")
annotation_col$Diet[1] <- "4%MN3"
annotation_col$Diet[3] <- "4%MN3"
annotation_col$Diet[4] <- "CTR"
annotation_col$Type[1] <- "Hindgut"
annotation_col$Type[2] <- "Hindgut"
annotation_col$Type[3] <- "Pyloric caeca"
annotation_col$Type[4] <- "Pyloric caeca"
annotation_colors <- list(
Diet = c(`CTR` = "#e41a1cff", `4%MN3` = "#984ea3ff"),
Type = c(`Hindgut` = "darkorange", `Pyloric caeca` = "darkblue"))
row.names(rabuplot_df_genus)[2] <- "BCP group"
differential_heatmap_plot <- pheatmap(t(rabuplot_df_genus),
annotation_row = annotation_col,
annotation_colors = annotation_colors,
color = heatmap_colors,
scale = "row",
cluster_rows = F,
cluster_cols = T,
gaps_row = c(2),
show_colnames = T,
show_rownames = F)
# Replace this base URL with the raw content URL of your GitHub repository
base_url <- "https://raw.githubusercontent.com/shashank-KU/ImprovaFish-MDF-Effects/main/"
# Specify the paths to your files on GitHub
host_gut_rawCounts_url <- paste0(base_url, "Transcriptomics/high_dosage_trial/output.txt")
sample_info_url <- paste0(base_url, "Transcriptomics/high_dosage_trial/sample_info.txt")
# Read the files
omics_data_host <- read.table(url(host_gut_rawCounts_url), header = T, row.names = 1)
sample_info <- read.table(url(sample_info_url), header = T, row.names = 1)
colnames(omics_data_host) <- sub("\\..*", "", colnames(omics_data_host))
r1 <- row.names(sample_info)
omics_data_host <- omics_data_host[, r1]
#all(colnames(omics_data_host) == rownames(sample_info))
# Hindgut
sample_info_hindgut <- sample_info[sample_info$sampleType == "hindgut", ]
matching_cols <- intersect(colnames(omics_data_host), rownames(sample_info_hindgut))
omics_data_host_gut <- omics_data_host[, matching_cols]
df <- round(omics_data_host_gut) %>%
# The next steps require a data frame and round() returns a matrix
as.data.frame() %>%
# Only keep rows that have total counts above the cutoff
dplyr::filter(rowSums(.) >= 0)
all(colnames(df) == rownames(sample_info_hindgut))
## [1] TRUE
dds_gut <- DESeqDataSetFromMatrix(
countData = df, # Our prepped data frame with counts
colData = sample_info_hindgut, # Data frame with annotation for our samples
design = ~New_Diet # Here we are not specifying a model
)
# dds_gut$group <- factor(paste0(dds_gut$New_Diet))
# design(dds_gut) <- ~ group-1
dds_gut <- DESeq(dds_gut, parallel = T)
#resultsNames(dds_gut)
res <- results(dds_gut)
res <- res[order(res$padj),]
table(res$padj < 0.05)
##
## FALSE TRUE
## 45745 6
res_tbl <- as_tibble(res, rownames = "ENSEMBL") %>%
filter(padj <0.05)%>%
arrange(padj)
vsdata <- vst(dds_gut)
pcaPlot <- DESeq2::plotPCA(vsdata, intgroup = c("New_Diet"), ntop = nrow(dds_gut), returnData = TRUE)
percentVar <- round(100 * attr(pcaPlot, "percentVar"))
plot12<- ggplot(pcaPlot, aes(PC1, PC2, color=New_Diet)) +
geom_point(aes(fill ="New_Diet" ), size =2.5) +
geom_point(shape = 1, size = 2.5,colour = "black") +
theme_bw() + ggtitle("Hindgut: Host transcriptomics") +
theme(plot.title = element_text(hjust = 0.5)) +
xlab("PC 1 [19 %]") + ylab("PC 2 [9 %]") +
stat_ellipse() +
scale_fill_manual(values = c("#984EA3", "#E41A1C")) +
scale_colour_manual( values = c("#984EA3", "#E41A1C"))+
guides(color = guide_legend(title = "Diet"))
# Specify the path to your RDS file on GitHub
base_url <- "https://raw.githubusercontent.com/shashank-KU/ImprovaFish-MDF-Effects/main/"
# Specify the paths to your files on GitHub
host_gut_rawCounts_url <- paste0(base_url, "Metatranscriptomics/high_dosage_trial/txi.kallisto.tsv")
sample_info_url <- paste0(base_url, "Metatranscriptomics/high_dosage_trial/sample_info.txt")
# Read the files
metatranscriptomics_data <- read.table(url(host_gut_rawCounts_url), header = T, row.names = 1, sep = "\t")
sample_info <- read.table(url(sample_info_url), header = T, row.names = 1, sep = "\t")
count_columns <- grep("counts\\.", names(metatranscriptomics_data), value = TRUE)
metatranscriptomics_data <- metatranscriptomics_data[count_columns]
colnames(metatranscriptomics_data) <- sub("counts.", "", colnames(metatranscriptomics_data))
r1 <- row.names(sample_info)
metatranscriptomics_data <- metatranscriptomics_data[, r1]
#all(colnames(metatranscriptomics_data) == rownames(sample_info))
# Hindgut
df <- round(metatranscriptomics_data) %>%
# The next steps require a data frame and round() returns a matrix
as.data.frame() %>%
# Only keep rows that have total counts above the cutoff
dplyr::filter(rowSums(.) >= 50)
all(colnames(df) == rownames(sample_info))
## [1] TRUE
dds_metaT_gut <- DESeqDataSetFromMatrix(
countData = df, # Our prepped data frame with counts
colData = sample_info, # Data frame with annotation for our samples
design = ~New_Diet # Here we are not specifying a model
)
dds_metaT_gut <- DESeq(dds_metaT_gut, parallel = T)
#resultsNames(dds_metaT_gut)
res <- results(dds_metaT_gut)
table(res$padj < 0.05)
##
## FALSE TRUE
## 15376 5
#head(results(dds_metaT_gut, tidy=TRUE)) #let's look at the results table
#summary(res) #summary of results
res <- res[order(res$padj),]
#head(res)
#table(res$padj < 0.05)
res_tbl <- as_tibble(res, rownames = "ENSEMBL") %>%
filter(padj <0.05)%>%
arrange(padj)
vsdata <- vst(dds_metaT_gut)
#plotPCA(vsdata, intgroup="New_Diet") + stat_ellipse() + theme_bw()
pcaPlot <- DESeq2::plotPCA(vsdata, intgroup = c("New_Diet"), ntop = nrow(dds_metaT_gut), returnData = TRUE)
plot13 <- ggplot(pcaPlot, aes(PC1, PC2, color=New_Diet)) +
geom_point(aes(fill ="New_Diet" ), size =2.5) +
geom_point(shape = 1, size = 2.5,colour = "black") +
theme_bw() + ggtitle("Hindgut: Metatranscriptomics") +
theme(plot.title = element_text(hjust = 0.5)) +
xlab("PC 1 [11 %]") + ylab("PC 2 [4 %]") +
stat_ellipse() +
scale_fill_manual(values = c("#984EA3", "#E41A1C")) +
scale_colour_manual( values = c("#984EA3", "#E41A1C"))+
guides(color = guide_legend(title = "Diet"))
left_pannel <- plot_grid(up_row, NULL, differential_heatmap_plot$gtable, align = 'hv', ncol = 1, rel_heights = c(.6,0.1, 1), labels = c('', "G"))
right_pannel <- plot_grid(PCoA_bray_HG_plot, PCoA_bray_PC_plot, plot12, plot13, align = "hv", ncol = 1, rel_widths = c(1,1,1,1), labels = c('D','E','F','H'))
plot_grid(left_pannel, right_pannel, align = "hv", rel_widths = c(1.2,1))