Load Packages
library("phyloseq")
library("ggplot2")
library(plyr)
library("dplyr")
## Warning: package 'dplyr' was built under R version 3.5.1
library("tidyr")
library("RColorBrewer", lib.loc="/Library/Frameworks/R.framework/Versions/3.4/Resources/library")
library(gridExtra)
library(reshape2)
cbPalette <- c("#999999", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7")
library("vegan")
load data (exported from qiime2 taxa-bar-plots.qzv
)
seqper<- read.csv("level-3.csv")
Filter to only contain Acanatharea samples and then remove taxa that were only present in the environmental samples.
seqper <- seqper[seqper$Organism == "Acantharea",]
row.names(seqper) <- seqper$meta
seqperRA <-seqper[,-c(1,30:35)]
seqperRA <- seqperRA[,colSums(seqperRA) > 0]
Transform data to relative abundance for plotting.
tseqperRA <- data.frame(t(seqperRA))
sumcols<- colSums(tseqperRA)
for(i in 1:ncol(tseqperRA)) {
tseqperRA[[i]]<- (tseqperRA[[i]]/sumcols[[i]])*100
}
tseqperRA<-round(tseqperRA, digits = 2)
#write.csv(tseqperRA, file="initrelabund.csv", quote = FALSE, row.names= TRUE )
prepare data for plotting
tseqperRA$row <-row.names(tseqperRA)
mSingles<- melt(tseqperRA, id.vars="row")
plot:
S2 <- brewer.pal(8, "Set2")
colors1 <- c("#50a33e", "#7eb38e", "#b1f41b", "#0e8121", "#5edbff", "#0097c3", "#0a3863", "#56B4E9", "#b2f5ff", "#0e5c63", "#1b8085", "#3aa6a9", "#ffb3b3", "#ff7b7b")
colors<- c( S2, colors1)
# put samples in more logical order on x axis and change label names
xorder = c("st2.A.1","st2.A.3","st2.A.5","st2.A.6","st2.A.7","st4.A.9","st10.A.10","st12.A.11","st12.A.12","st12.A.14","st12.A.15","st12.A.16","st13.A.18","st13.A.19","st13.A.20","st13.A.21","st17.A.22","st17.A.25","st17.A.27","st17.A.28", "st17.A.29","st17.A.31","st17.A.33","st17.A.34","st17.A.35","st17.A.36","st17.A.39","st17.A.40","st17.A.42","st17.A.43", "Onna.April.A3", "Onna.April.A4","Onna.May.A1","Onna.May.A3","Onna.May.A6","Onna.May.A7","Onna.May.A10","Onna.May.A11","Onna.May.A12", "Catalina.A.Ae","Catalina.A.C1","Catalina.A.C2" )
xnames = c("st2.1","st2.3","st2.5","st2.6","st2.7","st4.9","st10.10","st12.11","st12.12","st12.14","st12.15","st12.16","st13.18","st13.19","st13.20","st13.21","st17.22","st17.25","st17.27","st17.28", "st17.29","st17.31","st17.33","st17.34","st17.35","st17.36","st17.39","st17.40","st17.42","st17.43", "Oki.3A", "Oki.4A","Oki.1","Oki.3","Oki.6","Oki.7","Oki.10","Oki.11","Oki.12", "Cat.Ae","Cat.C1","Cat.C2")
relabund<- ggplot(mSingles, aes(x=variable, y=value, fill=row)) +
geom_bar(stat="identity") +
xlab("") +
ylab("") +
theme_bw()+
scale_x_discrete(limits = xorder, labels = xnames)+
theme(axis.text.x = element_text(angle = 90, hjust = 1))+
theme(legend.title=element_blank()) +
theme(legend.position="bottom") +
guides(fill=guide_legend(ncol=2))+
scale_y_continuous(expand = c(0, 0)) + scale_fill_manual(values=colors)
relabund + theme(legend.position="none") +
theme(text=element_text(size=16, family="serif"))
Load data and prepare dataframes to be imported as phyloseq objects.
SV Feature Table
singles <- read.delim("singles-feature-table-renamed.txt")
row.names(singles)<-singles[[1]]
singles<-singles[,-(1)]
singles<- singles[ , order(names(singles))]
#convert feature table to OTU phyloseq object
fmat <- as.matrix(singles)
OTU = otu_table(fmat, taxa_are_rows = TRUE)
Taxonomy Table
taxonomy <- read.csv("taxonomy.csv", stringsAsFactors = FALSE)
names(taxonomy) <- c("row", "tax", "Confidence")
row.names(taxonomy) <-taxonomy[[1]]
taxonomy <- taxonomy[,(-1)]
taxonomy <- separate(taxonomy, tax, c("D0","D1", "D2", "D3", "D4", "D5", "D6", "D7", "D8", "D9", "D10", "D11", "D12", "D13", "D14"), sep = ";", fill = "right")
taxonomy <- taxonomy[,c(1:5)]
taxmat <- as.matrix(taxonomy)
TAX = tax_table(taxmat)
Sample Information
metatable <- read.csv("sampledata.csv")
row.names(metatable) <- metatable[[1]]
metatable$HostSV <- as.character(metatable$HostSV)
metatable<- metatable[,(-1)]
META<- sample_data(metatable)
Phylogenetic Tree
tree<- read_tree("tree.nwk")
Make Phyloseq Object
ps<- phyloseq(OTU, TAX, META, tree)
prevdf = apply(X = otu_table(ps),
MARGIN = ifelse(taxa_are_rows(ps), yes = 1, no = 2),
FUN = function(x){sum(x > 0)})
prevdf = data.frame(Prevalence = prevdf,
TotalAbundance = taxa_sums(ps),
tax_table(ps))
plyr::ddply(prevdf, "D2", function(df1){cbind(mean(df1$Prevalence),sum(df1$Prevalence))})
## D2 1 2
## 1 Ambiguous_taxa 0.02564103 1
## 2 D_2__Alveolata 0.01602925 57
## 3 D_2__Ancyromonadida 4.00000000 8
## 4 D_2__Apusomonadidae 1.87500000 15
## 5 D_2__Chloroplastida 0.26582278 21
## 6 D_2__Cryptomonadales 0.22222222 2
## 7 D_2__Discoba 0.00000000 0
## 8 D_2__Discosea 1.00000000 2
## 9 D_2__Holozoa 0.21904762 46
## 10 D_2__Kathablepharidae 0.00000000 0
## 11 D_2__Nucletmycea 1.00000000 3
## 12 D_2__P1-31 0.00000000 0
## 13 D_2__Palpitomonas 0.06896552 2
## 14 D_2__Pav3 0.00000000 0
## 15 D_2__Pavlovophyceae 0.00000000 0
## 16 D_2__Prymnesiophyceae 1.15730337 412
## 17 D_2__Rhizaria 0.84988453 368
## 18 D_2__Rhodophyceae 1.00000000 1
## 19 D_2__Stramenopiles 0.11607143 26
## 20 D_2__Telonema 0.08064516 5
## 21 D_2__uncultured eukaryote 0.00000000 0
## 22 <NA> 0.10391198 85
prevplot1<-ggplot(prevdf, aes(TotalAbundance, Prevalence / nsamples(ps),color=D2)) +
geom_hline(yintercept = 0.05, alpha = 0.5, linetype = 2) + geom_point(size = 2, alpha = 0.7) +
theme_bw()+
scale_x_log10() + xlab("Total Abundance") + ylab("Prevalence [Frac. Samples]") +
facet_wrap(~D2) + theme(legend.position="none")
Initial Prevalence Plot
## Warning: Transformation introduced infinite values in continuous x-axis
Filter at 5% prevalence (SVs below the dashed lines in the plots will be discarded) )
prevalenceThreshold = 0.05 * nsamples(ps)
keepTaxa = rownames(prevdf)[(prevdf$Prevalence >= prevalenceThreshold)]
ps2 = prune_taxa(keepTaxa, ps)
table(tax_table(ps2)[, "D2"], exclude = NULL)
##
## D_2__Ancyromonadida D_2__Apusomonadidae D_2__Chloroplastida
## 1 2 1
## D_2__Holozoa D_2__Prymnesiophyceae D_2__Rhizaria
## 3 21 26
## D_2__Stramenopiles <NA>
## 2 5
Check out the NA samples
D2s <- c("D_2__Ancyromonadida", "D_2__Apusomonadidae", "D_2__Chloroplastida",
"D_2__Holozoa", "D_2__Prymnesiophyceae", "D_2__Rhizaria", "D_2__Stramenopiles")
physeqNA <- subset_taxa(ps2, !(D2 %in% D2s))
physeqNA
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 5 taxa and 42 samples ]
## sample_data() Sample Data: [ 42 samples by 3 sample variables ]
## tax_table() Taxonomy Table: [ 5 taxa by 5 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 5 tips and 4 internal nodes ]
naOTU <- data.frame(otu_table(physeqNA))
blasting the 5 NA sequences revealed that they are most likely host-derived. The top hits for each were Acantharea.
Remove Host SVs (SAR) and remaining prey (Opisthokonta) by filtering to keep only Haptophyte SVs.
physeqP <- subset_taxa(ps2, D1=="D_1__Haptophyta")
physeqP
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 21 taxa and 42 samples ]
## sample_data() Sample Data: [ 42 samples by 3 sample variables ]
## tax_table() Taxonomy Table: [ 21 taxa by 5 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 21 tips and 20 internal nodes ]
21 symbiotic SVs remain
Transform count data to relative abundance (as %) to normalize for differences in library size.
physeqPra<- transform_sample_counts(physeqP, function(OTU) 100* OTU/sum(OTU))
Determine the Bray-Curtis distances between samples and perform Principal Coordinate Analysis (PCoA). Plot the PCoA.
ordu = ordinate(physeqPra, "PCoA", "bray")
p<-plot_ordination(physeqPra, ordu, color="Location", shape = "HostSV")+theme_bw() +scale_color_manual(values=cbPalette)+ geom_point(size=3)+
theme(text=element_text(size=16, family="serif"))
PERMANOVA with Vegan function adonis
by collection location:
set.seed(1)
OTUs <- t(data.frame(otu_table(physeqPra))) #get data frame of symbiont SVs from phyloseq object object
meta <- metatable[metatable$Source=="host",] # filter sample data to include ONLY the samples included in this analysis. Otherwise, adonis will give an error.
adonis(vegdist(OTUs, method = "bray") ~ Location, data = meta)
##
## Call:
## adonis(formula = vegdist(OTUs, method = "bray") ~ Location, data = meta)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## Location 7 3.1581 0.45116 4.2343 0.46574 0.001 ***
## Residuals 34 3.6227 0.10655 0.53426
## Total 41 6.7808 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
and by Host SV type:
set.seed(1)
adonis(vegdist(OTUs, method = "bray") ~ HostSV, data = meta)
##
## Call:
## adonis(formula = vegdist(OTUs, method = "bray") ~ HostSV, data = meta)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## HostSV 4 1.3050 0.32625 2.2045 0.19246 0.009 **
## Residuals 37 5.4758 0.14800 0.80754
## Total 41 6.7808 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Export filtered SV table to perforem pairwise comparisons in Q2.
OTUs4Q2 <- data.frame(otu_table(physeqPra))
OTUs4Q2 <- cbind(rownames(OTUs4Q2), data.frame(OTUs4Q2, row.names=NULL))
colnames(OTUs4Q2)[1] <- "OTU ID"
write.table(OTUs4Q2, file='feature-table-2.txt', quote=FALSE, sep='\t', row.names = FALSE)
Export sample data for Q2 because sample “-” in sample names have changed to “.”
meta4Q2 <- cbind(rownames(meta), data.frame(meta, row.names=NULL))
colnames(meta4Q2)[1] <- "SampleID"
write.table(meta4Q2, file='metatable.tsv', quote=FALSE, sep='\t', row.names = FALSE)
symbioOTUs <- data.frame(otu_table(physeqPra)) #get data frame of symbiotic OTUs from phyloseq object
symbioOTUs$row <- row.names(symbioOTUs)
#make data frame of Symbiont OTUs with original counts
singles <- read.delim("singles-feature-table-renamed.txt")
singlesyms <- data.frame(symbioOTUs[,43])
names(singlesyms) <- c("row")
syms <- merge(singlesyms, singles, by="row")
#make dataframe for merging with environmental samples dataframe later
syms4filters <-syms
row.names(syms4filters) <-syms4filters$row
syms4filters<-syms4filters[,(-1)]
syms4filters<- syms4filters[ , order(names(syms4filters))]
MiraiAs <- syms4filters[,13:42]
MiraiAs$row <- rownames(MiraiAs)
#convert to relative abundance
row.names(syms) <-syms$row
singlesRA <- syms[,(-1)]
sumcols <- colSums(singlesRA)
for(i in 1:ncol(singlesRA)) {
singlesRA[[i]]<- (singlesRA[[i]]/sumcols[[i]])*100
}
syms<- singlesRA
#assign SVs ids based on their location on the phylogenetic tree. This will ensure that the SVs appear in the same order in the stacked bar plots as they do in the phylogenetic tree and will make it easier to look back and forth between the two plots
id <- c("S", "J", "K", "O", "N", "F", "M", "P", "R","A","D","B","H","E","I","G","T","C","U","L","Q")
syms$id2 <- id
#melt data frame for plotting
msyms<- melt(syms, id.vars="id2")
# put samples in more logical order on x axis and change label names
xorder = c("st2.A.1","st2.A.3","st2.A.5","st2.A.6","st2.A.7","st4.A.9","st10.A.10","st12.A.11","st12.A.12","st12.A.14","st12.A.15","st12.A.16","st13.A.18","st13.A.19","st13.A.20","st13.A.21","st17.A.22","st17.A.25","st17.A.27","st17.A.28", "st17.A.29","st17.A.31","st17.A.33","st17.A.34","st17.A.35","st17.A.36","st17.A.39","st17.A.40","st17.A.42","st17.A.43", "Onna.April.A3", "Onna.April.A4","Onna.May.A1","Onna.May.A3","Onna.May.A6","Onna.May.A7","Onna.May.A10","Onna.May.A11","Onna.May.A12", "Catalina.A.Ae","Catalina.A.C1","Catalina.A.C2" )
xnames = c("st2.1","st2.3","st2.5","st2.6","st2.7","st4.9","st10.10","st12.11","st12.12","st12.14","st12.15","st12.16","st13.18","st13.19","st13.20","st13.21","st17.22","st17.25","st17.27","st17.28", "st17.29","st17.31","st17.33","st17.34","st17.35","st17.36","st17.39","st17.40","st17.42","st17.43", "Oki.3A", "Oki.4A","Oki.1","Oki.3","Oki.6","Oki.7","Oki.10","Oki.11","Oki.12", "Cat.Ae","Cat.C1","Cat.C2")
barorder = c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "21")
#custom color scheme
colors <- c("#cd4a4a", "#9f1c3b", "#790a1a", "#878787", "#5e5e5e", "#292929", "#95cde8", "#4897d8", "#217ca3", "#bbfaff", "#3aa6a9", "#1b8085","#fff671", "#ffd662", "#f5dfb8", "#f5ba98", "#f77d75", "#bedb92", "#77c063", "#569358", "#065900")
#plot relative abundances of symbiotic SVs in acantharian hosts
HostSymRA<- ggplot(msyms, aes(x=variable, y=value, fill=id2)) +
geom_bar(stat="identity") +
xlab("") +
ylab("") +
theme_bw()+
scale_x_discrete(limits = xorder, labels = xnames)+
theme(axis.text.x = element_text(angle = 90, hjust = 1))+
theme(legend.title=element_blank()) +
scale_y_continuous(expand = c(0, 0)) +
scale_fill_manual(values=colors, labels = barorder)
Figure 2
Legend for Figures 2 and 3
g_legend<-function(a.gplot){
tmp <- ggplot_gtable(ggplot_build(a.gplot))
leg <- which(sapply(tmp$grobs, function(x) x$name) == "guide-box")
legend <- tmp$grobs[[leg]]
legend
}
legend <- g_legend(HostSymRA)
SVlegend <- grid.arrange(legend)
#count number rof non-zero rows for each column to get number of symbiotic SVs per host
nonzero <- function(x) sum(x != 0)
Svscnt<-as.data.frame(numcolwise(nonzero)(syms))
avg = as.integer(apply(Svscnt,1,mean,na.rm=TRUE)) +1
sd = as.integer(apply(Svscnt,1,sd,na.rm=TRUE))
min= apply(Svscnt,1,min,na.rm=TRUE)
max= apply(Svscnt,1,max,na.rm=TRUE)
## [1] "The mean number of SVs per host is 8 +/- 2 (standard deviation)."
## [1] "There are 4 to 12 SVs per host."
Load data and prepare dataframes to be imported as phyloseq objects.
filters <- read.delim("feature-table-filters.txt")
#filter feature table to include only the 0.2 um filters
names.use <- c("OTU","st2.DNA.0.2.1","st2.DNA.0.2.2","st4.DNA.0.2.1","st4.DNA.0.2.2","st10.DNA.0.2.1","st10.DNA.0.2.2","st12.DNA.0.2.1","st12.DNA.0.2.2","st13.DNA.0.2.1","st13.DNA.0.2.2","st17.DNA.0.2.1","st17.DNA.0.2.2" )
filters <- filters[,names.use]
Filter dataframe to contain the symbiotic SVs from acantharian hosts.
row.names(filters) <- filters$OTU
filters<- filters[,-(1)]
filters$row <- row.names(filters)
new<- merge(MiraiAs, filters, by = "row") #MiraiAs is dataframe of counts of symbiont SVs in the acantharians collected on Mirai
Convert filtered feature table to OTU phyloseq object.
row.names(new)<-new$row
new <- new[,-1]
fmat <- as.matrix(new)
OTU = otu_table(fmat, taxa_are_rows = TRUE)
physeq<- phyloseq(OTU, TAX, META, tree) #The taxonomy, metadata, and phylogenetic tree are the same as for the acantharian samples
Transform count data to relative abundance (as %) to normalize for differences in library size.
physeqPra<- transform_sample_counts(physeq, function(OTU) OTU/sum(OTU))
Determine the Bray-Curtis distances between samples and perform Principal Coordinate Analysis (PCoA). Plot the PCoA.
ordu = ordinate(physeqPra, "PCoA", "bray")
colors <- c("black", "#0F52BA")
p1<-plot_ordination(physeqPra, ordu, color="Source", shape = "Location") + geom_point(size=3.5)+theme_bw() +scale_color_manual(values=colors) +
theme(text=element_text(size=16, family="serif"))
PERMANOVA with Vegan function adonis
by sample source (Environment or Host):
OTUs <- t(data.frame(otu_table(physeqPra))) #get data frame of OTUs from phyloseq object
mirai<- c("st2", "st4", "st10", "st12", "st13", "st17")
meta <- metatable[metatable$Location %in% mirai,] #make sure metadata only has the samples included in the feature table
metaA <- meta[meta$Source == "host",]
metaF <- meta[meta$Source == "environment",]
names.use <- c("st2.DNA.0.2.1","st2.DNA.0.2.2","st4.DNA.0.2.1","st4.DNA.0.2.2","st10.DNA.0.2.1","st10.DNA.0.2.2","st12.DNA.0.2.1","st12.DNA.0.2.2","st13.DNA.0.2.1","st13.DNA.0.2.2","st17.DNA.0.2.1","st17.DNA.0.2.2" )
metaF<- metaF[names.use,]
meta <- rbind(metaA, metaF)
set.seed(1)
adonis(vegdist(OTUs, method = "bray") ~ Source, data = meta)
##
## Call:
## adonis(formula = vegdist(OTUs, method = "bray") ~ Source, data = meta)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## Source 1 2.1458 2.14576 19.378 0.32634 0.001 ***
## Residuals 40 4.4294 0.11073 0.67366
## Total 41 6.5751 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
keep<-MiraiAs
keep$id2<-id
keep <- data.frame(keep[,c(31,32)])
envRA <-merge(filters, keep, by = "row")
row.names(envRA) <- envRA$id2
envRA <- envRA[,-c(1,14)]
sumcols <- colSums(envRA)
for(i in 1:ncol(envRA)) {
envRA[[i]]<- (envRA[[i]]/sumcols[[i]])*100
}
envRA$id2 <- row.names(envRA)
menvRA<- melt(envRA, id.vars="id2")
# put samples in more logical order on x axis and change label names
filterxorder = c("st2.DNA.0.2.1","st2.DNA.0.2.2","st4.DNA.0.2.1","st4.DNA.0.2.2","st10.DNA.0.2.1","st10.DNA.0.2.2","st12.DNA.0.2.1","st12.DNA.0.2.2","st13.DNA.0.2.1","st13.DNA.0.2.2","st17.DNA.0.2.1","st17.DNA.0.2.2" )
filterxnames = c("st2.1","st2.2","st4.1","st4.2","st10.1","st10.2","st12.1","st12.2","st13.1","st13.2","st17.1","st17.2" )
colors <- c("#cd4a4a", "#9f1c3b", "#790a1a", "#878787", "#5e5e5e", "#292929", "#95cde8", "#4897d8", "#217ca3", "#bbfaff", "#3aa6a9", "#1b8085","#fff671", "#ffd662", "#f5dfb8", "#f5ba98", "#f77d75", "#bedb92", "#77c063", "#569358", "#065900")
envSymRA<- ggplot(menvRA, aes(x=variable, y=value, fill=id2)) +
geom_bar(stat="identity") +
xlab("") +
ylab("") +
scale_x_discrete(limits = filterxorder, labels = filterxnames)+
theme_bw()+
theme(axis.text.x = element_text(angle = 45, hjust = 1))+
scale_fill_manual(values=colors)+
theme(legend.title=element_blank()) +
scale_y_continuous(expand = c(0, 0)) + theme(legend.position="none")
Figure 3
How good are the replicates when condsidering the entire environmental microbial eukaryote community?
filters <- read.delim("feature-table-filters.txt")
#filter feature table to include only the 0.2 um filters
names.use <- c("OTU","st2.DNA.0.2.1","st2.DNA.0.2.2","st4.DNA.0.2.1","st4.DNA.0.2.2","st10.DNA.0.2.1","st10.DNA.0.2.2","st12.DNA.0.2.1","st12.DNA.0.2.2","st13.DNA.0.2.1","st13.DNA.0.2.2","st17.DNA.0.2.1","st17.DNA.0.2.2" )
filters <- filters[,names.use]
#convert feature table to OTU phyloseq object
row.names(filters)<- filters[[1]]
filters<-filters[,-(1)]
fmat <- as.matrix(filters)
Convert to phyloseq object
OTU = otu_table(fmat, taxa_are_rows = TRUE)
ps<- phyloseq(OTU, TAX, META, tree)
prevdf = apply(X = otu_table(ps),
MARGIN = ifelse(taxa_are_rows(ps), yes = 1, no = 2),
FUN = function(x){sum(x > 0)})
prevdf = data.frame(Prevalence = prevdf,
TotalAbundance = taxa_sums(ps),
tax_table(ps))
plyr::ddply(prevdf, "D2", function(df1){cbind(mean(df1$Prevalence),sum(df1$Prevalence))})
## D2 1 2
## 1 Ambiguous_taxa 5.4615385 213
## 2 D_2__Alveolata 1.6718223 5945
## 3 D_2__Ancyromonadida 0.0000000 0
## 4 D_2__Apusomonadidae 0.0000000 0
## 5 D_2__Chloroplastida 1.2911392 102
## 6 D_2__Cryptomonadales 0.6666667 6
## 7 D_2__Discoba 0.1111111 1
## 8 D_2__Discosea 0.0000000 0
## 9 D_2__Holozoa 0.5857143 123
## 10 D_2__Kathablepharidae 0.2222222 2
## 11 D_2__Nucletmycea 0.0000000 0
## 12 D_2__P1-31 6.0000000 12
## 13 D_2__Palpitomonas 1.8965517 55
## 14 D_2__Pav3 0.0000000 0
## 15 D_2__Pavlovophyceae 1.0000000 1
## 16 D_2__Prymnesiophyceae 2.4438202 870
## 17 D_2__Rhizaria 0.3233256 140
## 18 D_2__Rhodophyceae 0.0000000 0
## 19 D_2__Stramenopiles 1.7767857 398
## 20 D_2__Telonema 0.9354839 58
## 21 D_2__uncultured eukaryote 1.9090909 21
## 22 <NA> 0.7053790 577
envPrevplot<- ggplot(prevdf, aes(TotalAbundance, Prevalence / nsamples(ps),color=D1)) + geom_hline(yintercept = 0.05, alpha = 0.5, linetype = 2) + geom_point(size = 2, alpha = 0.7) +theme_bw()+
scale_x_log10() + xlab("Total Abundance") + ylab("Prevalence [Frac. Samples]") +
facet_wrap(~D1) + theme(legend.position="none")
## Warning: Transformation introduced infinite values in continuous x-axis
Apply Prevalence Filter
prevalenceThreshold = 0.05 * nsamples(ps)
keepTaxa = rownames(prevdf)[(prevdf$Prevalence >= prevalenceThreshold)]
ps2 = prune_taxa(keepTaxa, ps)
Remove SVs without Phylum classification.
keeps<- c("D_1__Archaeplastida", "D_1__Cryptophyceae", "D_1__Haptophyta", "D_1__Incertae Sedis", "D_1__Opisthokonta", "D_1__Picozoa", "D_1__SAR")
ps2 <- subset_taxa(ps2, D1 %in% keeps)
table(tax_table(ps2)[, "D3"], exclude = NULL)
##
## Ambiguous_taxa D_3__
## 18 33
## D_3__Bicosoecida D_3__Cercozoa
## 2 13
## D_3__Chlorophyta D_3__Choanoflagellida
## 37 26
## D_3__Ciliophora D_3__Dinoflagellata
## 233 195
## D_3__Labyrinthulomycetes D_3__Leucocryptos
## 3 2
## D_3__MAST-1 D_3__MAST-11
## 3 2
## D_3__MAST-3 D_3__MAST-7
## 7 11
## D_3__MAST-8 D_3__MAST-9
## 1 3
## D_3__Metazoa (Animalia) D_3__Ochrophyta
## 24 23
## D_3__Pavlova D_3__Phaeocystis
## 1 5
## D_3__Protalveolata D_3__Prymnesiales
## 1387 172
## D_3__Retaria D_3__RM2-SGM51
## 44 3
## D_3__uncultured eukaryote <NA>
## 29 268
Transform count data to relative abundance (as %) to normalize for differences in library size.
ps2ra<- transform_sample_counts(ps2, function(OTU) OTU/sum(OTU))
Determine the Bray-Curtis distances between samples and perform Principal Coordinate Analysis (PCoA). Plot the PCoA.
cbPalette <- c("#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7")
ordu = ordinate(ps2ra, "PCoA", "bray")
p3<-plot_ordination(ps2ra, ordu, color="Location") + geom_point(size=3.5) +scale_color_manual(values=cbPalette) +theme_bw() +
theme(text=element_text(size=16, family="serif"))