16S-rDNA-V3-V4 repository: https://github.com/ycl6/16S-rDNA-V3-V4
phyloseq: GitHub, Documentation, Paper
GUniFrac: Paper
Demo Dataset: PRJEB27564 from Gut Microbiota in Parkinson’s Disease: Temporal Stability and Relations to Disease Progression. EBioMedicine. 2019;44:691-707
License: GPL-3.0
R
cd /ngs/16S-Demo/PRJEB27564
R
library("data.table")
library("phyloseq")
library("DESeq2")
library("ggplot2")
library("ggbeeswarm")
library("ggrepel")
library("vegan")
library("tidyverse")
Set the full-path to additional scripts
Note: Remember to change the PATH to the script accordingly
source("/path-to-script/taxa_summary.R", local = TRUE)
load("1_dada2_tutorial.RData")
Check phyloseq
object
ps
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 7714 taxa and 126 samples ]
## sample_data() Sample Data: [ 126 samples by 7 sample variables ]
## tax_table() Taxonomy Table: [ 7714 taxa by 7 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 7714 tips and 7712 internal nodes ]
ps
using subset_taxa
Here, we limit the taxa to those from Bacteria, Phylum and Class in not unassigned, and not belonging to the Mitochondria Family
subset_taxa(ps, Kingdom == "Bacteria" & !is.na(Phylum) & !is.na(Class) &
ps0 = Family != "Mitochondria")
data.table(setDT(as.data.frame(tax_table(ps0))),
tdt =TotalCounts = taxa_sums(ps0), SV = taxa_names(ps0))
tdt
## Kingdom Phylum Class Order Family Genus
## 1: Bacteria Firmicutes Clostridia Oscillospirales Ruminococcaceae Subdoligranulum
## 2: Bacteria Firmicutes Clostridia Oscillospirales Ruminococcaceae Subdoligranulum
## 3: Bacteria Firmicutes Clostridia Oscillospirales Ruminococcaceae Subdoligranulum
## 4: Bacteria Firmicutes Clostridia Oscillospirales Ruminococcaceae Subdoligranulum
## 5: Bacteria Firmicutes Clostridia Oscillospirales Ruminococcaceae Subdoligranulum
## ---
## 6698: Bacteria Firmicutes Clostridia Oscillospirales Ruminococcaceae Subdoligranulum
## 6699: Bacteria Firmicutes Clostridia Oscillospirales Ruminococcaceae Subdoligranulum
## 6700: Bacteria Firmicutes Clostridia Oscillospirales Ruminococcaceae Subdoligranulum
## 6701: Bacteria Firmicutes Clostridia Oscillospirales Ruminococcaceae Subdoligranulum
## 6702: Bacteria Firmicutes Clostridia Oscillospirales Ruminococcaceae Subdoligranulum
## Species TotalCounts SV
## 1: <NA> 187 SV2272
## 2: <NA> 76 SV3398
## 3: <NA> 13 SV5815
## 4: <NA> 4873 SV0306
## 5: <NA> 80 SV3349
## ---
## 6698: <NA> 4273 SV0343
## 6699: <NA> 28 SV4887
## 6700: <NA> 36 SV4481
## 6701: <NA> 19 SV5326
## 6702: <NA> 456881 SV0001
ggplot(tdt, aes(TotalCounts)) + geom_histogram(bins = 50) + theme_bw() +
ggtitle("Histogram of Total Counts")
== 1), .N] # singletons tdt[(TotalCounts
## [1] 4
== 2), .N] # doubletons tdt[(TotalCounts
## [1] 243
# taxa cumulative sum
tdt[, .N, by = TotalCounts]
taxcumsum =setkey(taxcumsum, TotalCounts)
:= cumsum(N)]
taxcumsum[, CumSum
# Plot the cumulative sum of ASVs against the total counts
ggplot(taxcumsum, aes(TotalCounts, CumSum)) + geom_point() + theme_bw() +
pCumSum = xlab("Filtering Threshold") + ylab("ASV Filtered")
png("images/FilterTaxa-taxa-abundance.png", width = 8, height = 8, units = "in", res = 300)
::grid.arrange(pCumSum, pCumSum + xlim(0, 500),
gridExtra+ xlim(0, 100), pCumSum + xlim(0, 50), nrow = 2,
pCumSum top = "ASVs that would be filtered vs. minimum taxa counts threshold")
invisible(dev.off())
fast_melt(ps0)
mdt =
mdt
## Kingdom Phylum Class Order Family Genus
## 1: Bacteria Firmicutes Clostridia Oscillospirales Ruminococcaceae Subdoligranulum
## 2: Bacteria Firmicutes Clostridia Oscillospirales Ruminococcaceae Subdoligranulum
## 3: Bacteria Firmicutes Clostridia Oscillospirales Ruminococcaceae Subdoligranulum
## 4: Bacteria Firmicutes Clostridia Oscillospirales Ruminococcaceae Subdoligranulum
## 5: Bacteria Firmicutes Clostridia Oscillospirales Ruminococcaceae Subdoligranulum
## ---
## 844448: Bacteria Firmicutes Clostridia Oscillospirales Ruminococcaceae Subdoligranulum
## 844449: Bacteria Firmicutes Clostridia Oscillospirales Ruminococcaceae Subdoligranulum
## 844450: Bacteria Firmicutes Clostridia Oscillospirales Ruminococcaceae Subdoligranulum
## 844451: Bacteria Firmicutes Clostridia Oscillospirales Ruminococcaceae Subdoligranulum
## 844452: Bacteria Firmicutes Clostridia Oscillospirales Ruminococcaceae Subdoligranulum
## Species TaxaID SampleID count RelativeAbundance
## 1: <NA> SV0001 C0005B 4573 0.08854167
## 2: <NA> SV0001 C0009B 910 0.01713231
## 3: <NA> SV0001 C0019B 2059 0.02681269
## 4: <NA> SV0001 C0020B 1630 0.03978424
## 5: <NA> SV0001 C0024B 25660 0.24607536
## ---
## 844448: <NA> SV7714 P0120F 0 0.00000000
## 844449: <NA> SV7714 P0045F 0 0.00000000
## 844450: <NA> SV7714 P0045B 0 0.00000000
## 844451: <NA> SV7714 P0083F 0 0.00000000
## 844452: <NA> SV7714 P0083B 0 0.00000000
mdt[, list(Prevalence = sum(count > 0), TotalCounts = sum(count),
prevdt =MaxCounts = max(count)), by = TaxaID]
prevdt
## TaxaID Prevalence TotalCounts MaxCounts
## 1: SV0001 125 456881 25660
## 2: SV0002 95 325950 20183
## 3: SV0003 109 310049 18095
## 4: SV0004 117 295543 17405
## 5: SV0005 111 283096 19687
## ---
## 6698: SV7706 1 2 2
## 6699: SV7709 1 1 1
## 6700: SV7710 1 1 1
## 6701: SV7713 1 1 1
## 6702: SV7714 1 1 1
ggplot(prevdt, aes(Prevalence)) + geom_histogram(bins = 50) + theme_bw() +
ggtitle("Histogram of Taxa Prevalence")
== 1), .N] # singletons prevdt[(Prevalence
## [1] 3077
== 2), .N] # doubletons prevdt[(Prevalence
## [1] 1215
ggplot(prevdt, aes(MaxCounts)) + geom_histogram(bins = 100) + xlim(0, 500) + theme_bw() +
ggtitle("Histogram of Maximum TotalCounts")
table(prevdt$MaxCounts)[1:50]
##
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
## 4 256 249 202 192 169 172 126 135 117 109 109 100 92 89 87 76 84 59 62 76 66
## 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44
## 49 48 44 56 49 57 49 44 50 44 52 35 47 41 43 39 41 43 29 33 35 35
## 45 46 47 48 49 50
## 29 39 30 36 32 27
== 1)] # singletons prevdt[(MaxCounts
## TaxaID Prevalence TotalCounts MaxCounts
## 1: SV7709 1 1 1
## 2: SV7710 1 1 1
## 3: SV7713 1 1 1
## 4: SV7714 1 1 1
# taxa cumulative sum
prevdt[, .N, by = Prevalence]
prevcumsum =setkey(prevcumsum, Prevalence)
:= cumsum(N)]
prevcumsum[, CumSum
# Plot the cumulative sum of ASVs against the prevalence
ggplot(prevcumsum, aes(Prevalence, CumSum)) + geom_point(size = 2, alpha = 0.5) +
pPrevCumSum = theme_bw() + xlab("Filtering Threshold") + ylab("ASVs Filtered") +
ggtitle("ASVs that would be filtered vs. minimum sample count threshold")
png("images/FilterTaxa-taxa-prevalence.png", width = 8, height = 8, units = "in", res = 300)
pPrevCumSuminvisible(dev.off())
Prevalence vs. Total Count Scatter plot
png("images/FilterTaxa-Prevalence-TotalCounts.png", width = 8, height = 8, units = "in", res = 300)
ggplot(prevdt, aes(Prevalence, TotalCounts)) + geom_point(size = 2, alpha = 0.5) +
scale_y_log10() + theme_bw() + xlab("Prevalence [No. Samples]") + ylab("TotalCounts [Taxa]")
invisible(dev.off())
Colored by phylum
unique(copy(mdt[, list(TaxaID, Phylum)]))
addPhylum =
# Join by TaxaID
setkey(prevdt, TaxaID)
setkey(addPhylum, TaxaID)
addPhylum[prevdt]
prevdt =setkey(prevdt, Phylum)
png("images/FilterTaxa-Prevalence-TotalCounts-Phylum.png", width = 8, height = 8,
units = "in", res = 300)
ggplot(prevdt, aes(Prevalence, TotalCounts, color = Phylum)) +
geom_point(size = 1, alpha = 0.5) + scale_y_log10() + theme_bw() +
facet_wrap(~Phylum, nrow = 4) + theme(legend.position="none") +
xlab("Prevalence [No. Samples]") + ylab("Total Abundance")
invisible(dev.off())
Remember to review the images/FilterTaxa-*.png plots to select the best paramters
5
prevalenceThreshold = 10
abundanceThreshold = 5 maxThreshold =
prevdt[(Prevalence > prevalenceThreshold &
keepTaxa = TotalCounts > abundanceThreshold &
MaxCounts > maxThreshold), TaxaID]
prune_taxa(keepTaxa, ps0)
ps1 =
phy_tree(ps1) = ape::root(phy_tree(ps1), sample(taxa_names(ps1), 1), resolve.root = TRUE)
ps1
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 1450 taxa and 126 samples ]
## sample_data() Sample Data: [ 126 samples by 7 sample variables ]
## tax_table() Taxonomy Table: [ 1450 taxa by 7 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 1450 tips and 1449 internal nodes ]
::uniquesToFasta(df[rownames(df) %in% taxa_names(ps1),], "outfiles/expr.asv.fasta",
dada2ids = df[rownames(df) %in% taxa_names(ps1),]$id)
::write_biom(biomformat::make_biom(data = t(as.matrix(otu_table(ps1)))),
biomformat"outfiles/expr.biom")
write.table(as.data.table(otu_table(ps1), keep.rownames = T), file = "outfiles/expr.otu_table.txt",
sep = "\t", quote = F, row.names = F, col.names = T)
write.table(as.data.table(tax_table(ps1), keep.rownames = T), file = "outfiles/expr.tax_table.txt",
sep = "\t", quote = F, row.names = F, col.names = T)
write.table(ps1 %>% psmelt() %>% arrange(OTU) %>% rename(ASV = OTU) %>%
select(ASV, Kingdom, Phylum, Class, Order, Family, Genus, Species, Sample_ID, Abundance) %>%
spread(Sample_ID, Abundance),
file = "outfiles/expr.abundance.all.txt", sep = "\t",
quote = F, row.names = F, col.names = T)
write.table(ps1 %>% tax_glom(taxrank = "Phylum") %>% psmelt() %>%
select(Phylum, Sample_ID, Abundance) %>% spread(Sample_ID, Abundance),
file = "outfiles/expr.abundance.abphy.txt", sep = "\t",
quote = F, row.names = F, col.names = T)
write.table(ps1 %>% tax_glom(taxrank = "Class") %>% psmelt() %>%
select(Class, Sample_ID, Abundance) %>% spread(Sample_ID, Abundance),
file = "outfiles/expr.abundance.abcls.txt", sep = "\t",
quote = F, row.names = F, col.names = T)
write.table(ps1 %>% tax_glom(taxrank = "Family") %>% psmelt() %>%
select(Family, Sample_ID, Abundance) %>% spread(Sample_ID, Abundance),
file = "outfiles/expr.abundance.abfam.txt", sep = "\t",
quote = F, row.names = F, col.names = T)
write.table(ps1 %>% tax_glom(taxrank = "Genus") %>% psmelt() %>%
select(Genus, Sample_ID, Abundance) %>% spread(Sample_ID, Abundance),
file = "outfiles/expr.abundance.abgen.txt", sep = "\t",
quote = F, row.names = F, col.names = T)
Use the transform_sample_counts
function to transforms the sample counts of a taxa abundance matrix according to the provided function, in the case the fractions of the whole sum
write.table(ps1 %>% transform_sample_counts(function(x) {x/sum(x)} ) %>% psmelt() %>%
arrange(OTU) %>% rename(ASV = OTU) %>%
select(ASV, Kingdom, Phylum, Class, Order, Family, Genus, Species, Sample_ID, Abundance) %>%
spread(Sample_ID, Abundance),
file = "outfiles/expr.relative_abundance.all.txt",
sep = "\t", quote = F, row.names = F, col.names = T)
write.table(ps1 %>% tax_glom(taxrank = "Phylum") %>%
transform_sample_counts(function(x) {x/sum(x)} ) %>% psmelt() %>%
select(Phylum, Sample_ID, Abundance) %>% spread(Sample_ID, Abundance),
file = "outfiles/expr.relative_abundance.abphy.txt",
sep = "\t", quote = F, row.names = F, col.names = T)
write.table(ps1 %>% tax_glom(taxrank = "Class") %>%
transform_sample_counts(function(x) {x/sum(x)} ) %>% psmelt() %>%
select(Class, Sample_ID, Abundance) %>% spread(Sample_ID, Abundance),
file = "outfiles/expr.relative_abundance.abcls.txt",
sep = "\t", quote = F, row.names = F, col.names = T)
write.table(ps1 %>% tax_glom(taxrank = "Family") %>%
transform_sample_counts(function(x) {x/sum(x)} ) %>% psmelt() %>%
select(Family, Sample_ID, Abundance) %>% spread(Sample_ID, Abundance),
file = "outfiles/expr.relative_abundance.abfam.txt",
sep = "\t", quote = F, row.names = F, col.names = T)
write.table(ps1 %>% tax_glom(taxrank = "Genus") %>%
transform_sample_counts(function(x) {x/sum(x)} ) %>% psmelt() %>%
select(Genus, Sample_ID, Abundance) %>% spread(Sample_ID, Abundance),
file = "outfiles/expr.relative_abundance.abgen.txt",
sep = "\t", quote = F, row.names = F, col.names = T)
# Transform to proportions (relative abundances)
transform_sample_counts(ps1, function(OTU) OTU/sum(OTU))
ps1.rp =
# Top N taxa
200
N = names(sort(taxa_sums(ps1), decreasing=TRUE))[1:N]
topN = transform_sample_counts(ps1, function(OTU) OTU/sum(OTU))
ps1.topN = prune_taxa(topN, ps1.topN)
ps1.topN =
ps1.topN
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 200 taxa and 126 samples ]
## sample_data() Sample Data: [ 126 samples by 7 sample variables ]
## tax_table() Taxonomy Table: [ 200 taxa by 7 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 200 tips and 199 internal nodes ]
# All taxa
plot_bar(ps1.rp, x = "Sample_ID", fill = "Phylum",
ptaxa1 =title = paste(ntaxa(ps1.rp), "Taxa (Phylum)")) +
geom_bar(stat = "identity", size = 0.1, color = "black") +
facet_wrap(~Group2, scales = "free_x", nrow = 1) + guides(fill = guide_legend(ncol = 1)) +
scale_y_continuous(breaks = seq(0, 1, 0.1)) +
theme(legend.title = element_text(size = 8), legend.text = element_text(size = 7),
axis.text.x = element_text(size = 5, angle = 90, vjust = 0.5, hjust = 1),
axis.text.y = element_text(size = 12)) + xlab("Sample") + ylab("Relative abundance")
# Top 200 taxa
plot_bar(ps1.topN, x = "Sample_ID", fill = "Phylum",
ptaxa2 =title = paste("Top",N, "Taxa (Phylum)")) +
geom_bar(stat = "identity", size = 0.1, color = "black") +
facet_wrap(~Group2, scales = "free_x", nrow = 1) + guides(fill = guide_legend(ncol = 1)) +
scale_y_continuous(breaks = seq(0, 1, 0.1)) +
theme(legend.title = element_text(size = 8), legend.text = element_text(size = 7),
axis.text.x = element_text(size = 5, angle = 90, vjust = 0.5, hjust = 1),
axis.text.y = element_text(size = 12)) + xlab("Sample") + ylab("Relative Abundance")
png("images/plot_bar_phylum.png", width = 10, height = 8, units = "in", res = 300)
::grid.arrange(ptaxa1, ptaxa2, nrow = 2)
gridExtrainvisible(dev.off())
plot_bar(ps1.rp, x = "Sample_ID", fill = "Class",
ptaxa3 =title = paste(ntaxa(ps1.rp), "Taxa (Class)")) +
geom_bar(stat = "identity", size = 0.1, color = "black") +
facet_wrap(~Group2, scales = "free_x", nrow = 1) + guides(fill = guide_legend(ncol = 1)) +
scale_y_continuous(breaks = seq(0, 1, 0.1)) +
theme(legend.title = element_text(size = 8), legend.text = element_text(size = 7),
axis.text.x = element_text(size = 5, angle = 90, vjust = 0.5, hjust = 1),
axis.text.y = element_text(size = 12)) + xlab("Sample") + ylab("Relative abundance")
plot_bar(ps1.topN, x = "Sample_ID", fill = "Class",
ptaxa4 =title = paste("Top",N, "Taxa (Class)")) +
geom_bar(stat = "identity", size = 0.1, color = "black") +
facet_wrap(~Group2, scales = "free_x", nrow = 1) + guides(fill = guide_legend(ncol = 1)) +
scale_y_continuous(breaks = seq(0, 1, 0.1)) +
theme(legend.title = element_text(size = 8), legend.text = element_text(size = 7),
axis.text.x = element_text(size = 5, angle = 90, vjust = 0.5, hjust = 1),
axis.text.y = element_text(size = 12)) + xlab("Sample") + ylab("Relative Abundance")
png("images/plot_bar_class.png", width = 10, height = 8, units = "in", res = 300)
::grid.arrange(ptaxa3, ptaxa4, nrow = 2)
gridExtrainvisible(dev.off())
plot_bar(ps1.topN, x = "Sample_ID", fill = "Family",
ptaxa5 =title = paste("Top",N, "Taxa (Family)")) +
geom_bar(stat = "identity", size = 0.1, color = "black") +
facet_wrap(~Group2, scales = "free_x", nrow = 1) + guides(fill = guide_legend(ncol = 1)) +
scale_y_continuous(breaks = seq(0, 1, 0.1)) +
theme(legend.title = element_text(size = 8), legend.text = element_text(size = 7),
axis.text.x = element_text(size = 5, angle = 90, vjust = 0.5, hjust = 1),
axis.text.y = element_text(size = 12)) + xlab("Sample") + ylab("Relative Abundance")
png("images/plot_bar_family.png", width = 10, height = 8, units = "in", res = 300)
ptaxa5invisible(dev.off())
plot_bar(ps1.topN, x = "Sample_ID", fill = "Genus",
ptaxa6 =title = paste("Top",N, "Taxa (Genus)")) +
geom_bar(stat = "identity", size = 0.1, color = "black") +
facet_wrap(~Group2, scales = "free_x", nrow = 1) + guides(fill = guide_legend(ncol = 2)) +
scale_y_continuous(breaks = seq(0, 1, 0.1)) +
theme(legend.title = element_text(size = 8), legend.text = element_text(size = 7),
axis.text.x = element_text(size = 5, angle = 90, vjust = 0.5, hjust = 1),
axis.text.y = element_text(size = 12)) + xlab("Sample") + ylab("Relative Abundance")
png("images/plot_bar_genus.png", width = 10, height = 8, units = "in", res = 300)
ptaxa6invisible(dev.off())
Explore alpha diversity using the plot_richness
function
# Select alpha-diversity measures
c("Observed", "Chao1", "Shannon", "Simpson")
divIdx =
png("images/plot_richness.png", width = 6, height = 5, units = "in", res = 300)
plot_richness(ps1, x = "Group2", measures = divIdx, color = "Group2", nrow = 1) +
geom_point(size = 0.8) + theme_bw() +
theme(legend.position = "none",
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
labs(x = "Group", y = "Alpha Diversity Measure")
invisible(dev.off())
Create a long-format data.frame and plot with ggplot
function
estimate_richness(ps1, measures = divIdx)
ad = merge(data.frame(sample_data(ps1)), ad, by = "row.names")
ad = ad %>% select(Sample_ID, Group2, all_of(divIdx)) %>%
ad = gather(key = "alpha", value = "measure", -c(Sample_ID, Group2))
png("images/plot_richness_boxplot.png", width = 6, height = 5, units = "in", res = 300)
ggplot(ad, aes(Group2, measure, color = Group2)) +
geom_boxplot(outlier.shape = NA, size = 0.8, width = 0.8) +
geom_quasirandom(size = 0.8, color = "black") + theme_bw() +
facet_wrap(~ alpha, scales = "free_y", nrow = 1) +
theme(legend.position = "none",
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
labs(x = "Group", y = "Alpha Diversity Measure")
invisible(dev.off())
Perform various flavors of unifrac measurements using the GUniFrac
package
GUniFrac::GUniFrac(otu_table(ps1), phy_tree(ps1), alpha = c(0, 0.5, 1))$unifracs unifracs =
Perform ordination
# Calculate geometric means prior to estimate size factors
function(x, na.rm = TRUE){
gm_mean =exp(sum(log(x[x > 0]), na.rm = na.rm) / length(x))
}
# Perform variance stabilizing transformation
ps1
ps1.vsd = phyloseq_to_deseq2(ps1, ~ Group2) dds =
## converting counts to integer mode
apply(counts(dds), 1, gm_mean)
geoMeans = estimateSizeFactors(dds, geoMeans = geoMeans)
dds = estimateDispersions(dds) dds =
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
getVarianceStabilizedData(dds)
abund.vsd =< 0] = 0 # set negative values to 0
abund.vsd[abund.vsd otu_table(ps1.vsd) = otu_table(abund.vsd, taxa_are_rows = TRUE)
# Create distance objects
as.dist(unifracs[, , "d_UW"]) # Unweighted UniFrac
dist_un =attr(dist_un, "method") = "Unweighted UniFrac"
as.dist(unifracs[, , "d_1"]) # Weighted UniFrac
dist_wu =attr(dist_wu, "method") = "Weighted UniFrac"
as.dist(unifracs[, , "d_VAW"]) # Variance-adjusted-weighted UniFrac
dist_vu =attr(dist_vu, "method") = "Variance-adjusted-weighted UniFrac"
as.dist(unifracs[, , "d_0.5"]) # GUniFrac with alpha 0.5
dist_gu =attr(dist_gu, "method") = "GUniFrac with alpha 0.5"
phyloseq::distance(ps1.vsd, "bray") # Bray-Curtis
dist_bc =
# Perform ordination
ordinate(ps1, method = "PCoA", distance = dist_un)
ord_un = ordinate(ps1, method = "PCoA", distance = dist_wu)
ord_wu = ordinate(ps1, method = "PCoA", distance = dist_vu)
ord_vu = ordinate(ps1, method = "PCoA", distance = dist_gu)
ord_gu =set.seed(12345)
ordinate(ps1, method = "NMDS", distance = dist_bc) ord_bc =
## Run 0 stress 0.164095
## Run 1 stress 0.164095
## ... Procrustes: rmse 0.0003787895 max resid 0.003675405
## ... Similar to previous best
## Run 2 stress 0.4123708
## Run 3 stress 0.1640947
## ... New best solution
## ... Procrustes: rmse 0.0002492135 max resid 0.002473993
## ... Similar to previous best
## Run 4 stress 0.1640948
## ... Procrustes: rmse 0.0001457638 max resid 0.001053662
## ... Similar to previous best
## Run 5 stress 0.1657887
## Run 6 stress 0.1640946
## ... New best solution
## ... Procrustes: rmse 0.00008415684 max resid 0.0008498087
## ... Similar to previous best
## Run 7 stress 0.164095
## ... Procrustes: rmse 0.0002501875 max resid 0.002166513
## ... Similar to previous best
## Run 8 stress 0.1640946
## ... New best solution
## ... Procrustes: rmse 0.0001519537 max resid 0.001130737
## ... Similar to previous best
## Run 9 stress 0.1657897
## Run 10 stress 0.1640946
## ... Procrustes: rmse 0.0001009962 max resid 0.0009432627
## ... Similar to previous best
## Run 11 stress 0.165789
## Run 12 stress 0.1640952
## ... Procrustes: rmse 0.0003352119 max resid 0.002984929
## ... Similar to previous best
## Run 13 stress 0.1640948
## ... Procrustes: rmse 0.0002358344 max resid 0.001624603
## ... Similar to previous best
## Run 14 stress 0.1640947
## ... Procrustes: rmse 0.00006278543 max resid 0.0003796508
## ... Similar to previous best
## Run 15 stress 0.1907991
## Run 16 stress 0.1657896
## Run 17 stress 0.1657889
## Run 18 stress 0.1657893
## Run 19 stress 0.1640951
## ... Procrustes: rmse 0.0003231099 max resid 0.002735841
## ... Similar to previous best
## Run 20 stress 0.1657883
## *** Solution reached
Output to PDF
pdf("images/plot_ordination.pdf", width = 6, height = 5, pointsize = 12)
plot_ordination(ps1, ord_un, type = "samples", color = "Group2",
title = "PCoA on unweighted UniFrac") +
theme_bw() + coord_fixed(ratio = 1) +
stat_ellipse(aes(group = Group2), type = "t", size = 0.5, linetype = "dashed")
plot_ordination(ps1, ord_wu, type = "samples", color = "Group2",
title = "PCoA on weighted UniFrac") +
theme_bw() + coord_fixed(ratio = 1) +
stat_ellipse(aes(group = Group2), type = "t", size = 0.5, linetype = "dashed")
plot_ordination(ps1, ord_vu, type = "samples", color = "Group2",
title = "PCoA on Variance adjusted weighted UniFrac") +
theme_bw() + coord_fixed(ratio = 1) +
stat_ellipse(aes(group = Group2), type = "t", size = 0.5, linetype = "dashed")
plot_ordination(ps1, ord_gu, type = "samples", color = "Group2",
title = "PCoA on GUniFrac with alpha 0.5") +
theme_bw() + coord_fixed(ratio = 1) +
stat_ellipse(aes(group = Group2), type = "t", size = 0.5, linetype = "dashed")
plot_ordination(ps1, ord_bc, type = "samples", color = "Group2",
title = "NMDS on Bray-Curtis distance (VST)") +
theme_bw() + coord_fixed(ratio = 1) +
stat_ellipse(aes(group = Group2), type = "t", size = 0.5, linetype = "dashed")
invisible(dev.off())
Example: PCoA on weighted UniFrac
Permutational multivariate analysis of variance (PERMANOVA) test for differences between independent groups using the adonis
function
set.seed(12345)
adonis(dist_un ~ Group2, data.frame(sample_data(ps1)), permutations = 1000)
##
## Call:
## adonis(formula = dist_un ~ Group2, data = data.frame(sample_data(ps1)), permutations = 1000)
##
## Permutation: free
## Number of permutations: 1000
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## Group2 3 0.3596 0.11988 1.1438 0.02736 0.1499
## Residuals 122 12.7869 0.10481 0.97264
## Total 125 13.1465 1.00000
set.seed(12345)
adonis(dist_wu ~ Group2, data.frame(sample_data(ps1)), permutations = 1000)
##
## Call:
## adonis(formula = dist_wu ~ Group2, data = data.frame(sample_data(ps1)), permutations = 1000)
##
## Permutation: free
## Number of permutations: 1000
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## Group2 3 0.1706 0.056865 1.2852 0.03063 0.1459
## Residuals 122 5.3982 0.044248 0.96937
## Total 125 5.5688 1.00000
set.seed(12345)
adonis(dist_vu ~ Group2, data.frame(sample_data(ps1)), permutations = 1000)
##
## Call:
## adonis(formula = dist_vu ~ Group2, data = data.frame(sample_data(ps1)), permutations = 1000)
##
## Permutation: free
## Number of permutations: 1000
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## Group2 3 -0.0181 -0.0060325 -0.46755 -0.01163 0.9071
## Residuals 122 1.5741 0.0129022 1.01163
## Total 125 1.5560 1.00000
set.seed(12345)
adonis(dist_gu ~ Group2, data.frame(sample_data(ps1)), permutations = 1000)
##
## Call:
## adonis(formula = dist_gu ~ Group2, data = data.frame(sample_data(ps1)), permutations = 1000)
##
## Permutation: free
## Number of permutations: 1000
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## Group2 3 0.3953 0.13176 1.1215 0.02684 0.2068
## Residuals 122 14.3330 0.11748 0.97316
## Total 125 14.7283 1.00000
set.seed(12345)
adonis(dist_bc ~ Group2, data.frame(sample_data(ps1)), permutations = 1000)
##
## Call:
## adonis(formula = dist_bc ~ Group2, data = data.frame(sample_data(ps1)), permutations = 1000)
##
## Permutation: free
## Number of permutations: 1000
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## Group2 3 0.6691 0.22304 1.1532 0.02758 0.1239
## Residuals 122 23.5960 0.19341 0.97242
## Total 125 24.2652 1.00000
Test for homogeneity condition among groups using the betadisper
function
betadisper(dist_un, group = data.frame(sample_data(ps1))$Group2)
disp1 =set.seed(12345)
permutest(disp1, permutations = 1000)
##
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 1000
##
## Response: Distances
## Df Sum Sq Mean Sq F N.Perm Pr(>F)
## Groups 3 0.001941 0.00064713 0.3975 1000 0.7493
## Residuals 122 0.198631 0.00162812
betadisper(dist_wu, group = data.frame(sample_data(ps1))$Group2)
disp2 =set.seed(12345)
permutest(disp2, permutations = 1000)
##
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 1000
##
## Response: Distances
## Df Sum Sq Mean Sq F N.Perm Pr(>F)
## Groups 3 0.01620 0.0053986 1.7576 1000 0.1548
## Residuals 122 0.37473 0.0030715
betadisper(dist_vu, group = data.frame(sample_data(ps1))$Group2)
disp3 =set.seed(12345)
permutest(disp3, permutations = 1000)
##
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 1000
##
## Response: Distances
## Df Sum Sq Mean Sq F N.Perm Pr(>F)
## Groups 3 0.000999 0.00033284 0.3408 1000 0.8232
## Residuals 122 0.119158 0.00097671
betadisper(dist_gu, group = data.frame(sample_data(ps1))$Group2)
disp4 =set.seed(12345)
permutest(disp4, permutations = 1000)
##
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 1000
##
## Response: Distances
## Df Sum Sq Mean Sq F N.Perm Pr(>F)
## Groups 3 0.006557 0.0021856 0.8989 1000 0.4555
## Residuals 122 0.296638 0.0024315
betadisper(dist_bc, group = data.frame(sample_data(ps1))$Group2)
disp5 =set.seed(12345)
permutest(disp5, permutations = 1000)
##
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 1000
##
## Response: Distances
## Df Sum Sq Mean Sq F N.Perm Pr(>F)
## Groups 3 0.01731 0.0057686 1.6737 1000 0.1898
## Residuals 122 0.42049 0.0034467
Example: Dispersion of unweighted UniFrac
plot(disp3, hull = FALSE, ellipse = TRUE)
save.image(file = "2_phyloseq_tutorial.RData")
sessionInfo()
## R version 4.1.0 (2021-05-18)
## Platform: x86_64-conda-linux-gnu (64-bit)
## Running under: Ubuntu 18.04.5 LTS
##
## Matrix products: default
## BLAS/LAPACK: /home/ihsuan/miniconda3_4.10.0/envs/r4-16S/lib/libopenblasp-r0.3.15.so
##
## locale:
## [1] LC_CTYPE=en_GB.UTF-8 LC_NUMERIC=C LC_TIME=en_GB.UTF-8
## [4] LC_COLLATE=en_GB.UTF-8 LC_MONETARY=en_GB.UTF-8 LC_MESSAGES=en_GB.UTF-8
## [7] LC_PAPER=en_GB.UTF-8 LC_NAME=C LC_ADDRESS=C
## [10] LC_TELEPHONE=C LC_MEASUREMENT=en_GB.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] parallel stats4 stats graphics grDevices utils datasets methods
## [9] base
##
## other attached packages:
## [1] forcats_0.5.1 stringr_1.4.0 dplyr_1.0.7
## [4] purrr_0.3.4 readr_1.4.0 tidyr_1.1.3
## [7] tibble_3.1.2 tidyverse_1.3.1 vegan_2.5-7
## [10] lattice_0.20-44 permute_0.9-5 ggrepel_0.9.1
## [13] ggbeeswarm_0.6.0 ggplot2_3.3.4 DESeq2_1.32.0
## [16] SummarizedExperiment_1.22.0 Biobase_2.52.0 MatrixGenerics_1.4.0
## [19] matrixStats_0.59.0 GenomicRanges_1.44.0 GenomeInfoDb_1.28.0
## [22] IRanges_2.26.0 S4Vectors_0.30.0 BiocGenerics_0.38.0
## [25] phyloseq_1.36.0 data.table_1.14.0 knitr_1.33
##
## loaded via a namespace (and not attached):
## [1] colorspace_2.0-2 hwriter_1.3.2 ellipsis_0.3.2
## [4] XVector_0.32.0 fs_1.5.0 rstudioapi_0.13
## [7] farver_2.1.0 bit64_4.0.5 AnnotationDbi_1.54.0
## [10] fansi_0.4.2 lubridate_1.7.10 xml2_1.3.2
## [13] codetools_0.2-18 splines_4.1.0 cachem_1.0.5
## [16] dada2_1.20.0 geneplotter_1.70.0 ade4_1.7-17
## [19] jsonlite_1.7.2 Rsamtools_2.8.0 broom_0.7.8
## [22] annotate_1.70.0 cluster_2.1.2 dbplyr_2.1.1
## [25] png_0.1-7 compiler_4.1.0 httr_1.4.2
## [28] backports_1.2.1 assertthat_0.2.1 Matrix_1.3-4
## [31] fastmap_1.1.0 cli_2.5.0 htmltools_0.5.1.1
## [34] tools_4.1.0 igraph_1.2.6 gtable_0.3.0
## [37] glue_1.4.2 GenomeInfoDbData_1.2.6 reshape2_1.4.4
## [40] ShortRead_1.50.0 Rcpp_1.0.6 cellranger_1.1.0
## [43] vctrs_0.3.8 Biostrings_2.60.0 rhdf5filters_1.4.0
## [46] multtest_2.48.0 ape_5.5 nlme_3.1-152
## [49] iterators_1.0.13 xfun_0.24 ps_1.6.0
## [52] rvest_1.0.0 lifecycle_1.0.0 XML_3.99-0.6
## [55] zlibbioc_1.38.0 MASS_7.3-54 scales_1.1.1
## [58] hms_1.1.0 biomformat_1.20.0 rhdf5_2.36.0
## [61] RColorBrewer_1.1-2 yaml_2.2.1 gridExtra_2.3
## [64] memoise_2.0.0 latticeExtra_0.6-29 stringi_1.6.2
## [67] RSQLite_2.2.5 highr_0.9 genefilter_1.74.0
## [70] foreach_1.5.1 BiocParallel_1.26.0 rlang_0.4.11
## [73] pkgconfig_2.0.3 bitops_1.0-7 evaluate_0.14
## [76] Rhdf5lib_1.14.0 labeling_0.4.2 GenomicAlignments_1.28.0
## [79] bit_4.0.4 tidyselect_1.1.1 plyr_1.8.6
## [82] magrittr_2.0.1 R6_2.5.0 generics_0.1.0
## [85] DelayedArray_0.18.0 DBI_1.1.1 pillar_1.6.1
## [88] haven_2.4.1 withr_2.4.2 mgcv_1.8-36
## [91] survival_3.2-11 KEGGREST_1.32.0 RCurl_1.98-1.3
## [94] modelr_0.1.8 crayon_1.4.1 utf8_1.2.1
## [97] rmarkdown_2.9 jpeg_0.1-8.1 locfit_1.5-9.4
## [100] grid_4.1.0 readxl_1.3.1 blob_1.2.1
## [103] reprex_2.0.0 digest_0.6.27 xtable_1.8-4
## [106] RcppParallel_5.1.4 munsell_0.5.0 beeswarm_0.4.0
## [109] vipor_0.4.5