Getting started

Packages, colors, functions

library(qiime2R)
library(CoDaSeq)
library(phyloseq)
library(lubridate)
library(tidyverse)
library(patchwork)
library(SpiecEasi)
library(vegan)
library(scales)

`%ni%` = Negate(`%in%`)

source("legendplot.R")

colors=c('#e9e9e9','#C14450','#f0b08f','#c2aba6','#60555f','#3c6481','#9fd6e6','#256F64','#63a375')

red<- c("#EB8B8E","#FBD5E2","#E7B6AF","#AC6873", "#D82354")
orange <- c("#FAAA6D","#FECF92")
yellow <- c("#FFC317","#F7F4B7", "#CC9C3C")
green <- c("#16866F","#1E3F1C","#99A339","#516A65","#8BC89F")
blue <- c("#005694","#B7E1DD","#66879E","#1BAAE2","#5FC8D8")
purple <- c("#E7D7CE","#A699A9","#434582","#81347D", "#B5218E")

colors30 <- c(blue, purple, red, yellow, orange, green, "black")

scripts <- c("graphical_methods.R",
             "tree_methods.R",
             "plot_merged_trees.R",
             "specificity_methods.R",
             "ternary_plot.R",
             "richness.R",
             "edgePCA.R",
             "copy_number_correction.R",
             "import_frogs.R",
             "prevalence.R",
             "compute_niche.R")
urls <- paste0("https://raw.githubusercontent.com/mahendra-mariadassou/phyloseq-extended/master/R/", scripts)

for (url in urls) {
  source(url)
}

Sample Metadata

samplemap<- read.csv("meta.csv")
samplemap$date <- dmy(samplemap$date)

## Warning: 5 failed to parse.

samplemap$Month <- as.factor(as.numeric(samplemap$Month))

## Warning in is.factor(x): NAs introduced by coercion

samplemap <- samplemap %>% mutate(Day=day(date)) %>% unite(Name, c(LandUse, Number, Position, Month, Day), sep = "_", remove = FALSE)

samplemap$Position <- factor(samplemap$Position, levels =c("S", "C", "N"))
samplemap$LandUse <- factor(samplemap$LandUse, levels = c("U", "R"))

str(samplemap)

## 'data.frame':    297 obs. of  10 variables:
##  $ SampleID: chr  "R1C-2020-09-17-1" "R1C-2020-10-01-49" "R1C-2020-10-15-51" "R1C-2020-10-29-66" ...
##  $ SS      : chr  "R1C" "R1C" "R1C" "R1C" ...
##  $ date    : Date, format: "2020-09-17" "2020-10-01" ...
##  $ Name    : chr  "R_1_C_9_17" "R_1_C_10_1" "R_1_C_10_15" "R_1_C_10_29" ...
##  $ LandUse : Factor w/ 2 levels "U","R": 2 2 2 2 2 2 2 2 2 2 ...
##  $ Number  : chr  "1" "1" "1" "1" ...
##  $ Position: Factor w/ 3 levels "S","C","N": 2 2 2 2 2 2 2 3 3 3 ...
##  $ Month   : Factor w/ 12 levels "1","2","3","4",..: 9 10 10 10 11 12 12 9 10 10 ...
##  $ Area    : chr  "R1" "R1" "R1" "R1" ...
##  $ Day     : int  17 1 15 29 12 10 24 17 1 29 ...

Sequence processing stats

denoise_stats1 <- read.csv("denoising_stats1.tsv", header = TRUE, sep = "\t")[-1,]
print(dim(denoise_stats1))

## [1] 80  9

denoise_stats2 <- read.csv("denoising_stats2.tsv", header = TRUE, sep = "\t")[-1,]
print(dim(denoise_stats2))

## [1] 96  9

denoise_stats3 <- read.csv("denoising_stats3.tsv", header = TRUE, sep = "\t")[-1,]
print(dim(denoise_stats3))

## [1] 96  9

denoise_stats4 <- read.csv("denoising_stats4.tsv", header = TRUE, sep = "\t")[-1,]
print(dim(denoise_stats4))

## [1] 25  9

denoise_stats <- rbind(denoise_stats1, denoise_stats2, denoise_stats3, denoise_stats4 )
names(denoise_stats)[1] <- "SampleID"

denoise_stat_w_meta <- merge(denoise_stats, samplemap, by = "SampleID") %>% filter(Number != "s")
denoise_stat_w_meta$input <- as.numeric(denoise_stat_w_meta$input)
denoise_stat_w_meta$filtered <- as.numeric(denoise_stat_w_meta$filtered)
denoise_stat_w_meta$denoised <- as.numeric(denoise_stat_w_meta$denoised)
denoise_stat_w_meta$merged <- as.numeric(denoise_stat_w_meta$merged)
denoise_stat_w_meta$non.chimeric <- as.numeric(denoise_stat_w_meta$non.chimeric)

dim(denoise_stat_w_meta)

## [1] 292  18

Numbers of sequencing reads

minimum number of reads per sample:

min(denoise_stat_w_meta$input)

## [1] 2232

denoise_stat_w_meta %>% filter(input ==2232)

##         SampleID input filtered percentage.of.input.passed.filter denoised
## 1 U2N-28-07-2021  2232      164                              7.35      147
##   merged percentage.of.input.merged non.chimeric
## 1     17                       0.76           17
##   percentage.of.input.non.chimeric  SS       date       Name LandUse Number
## 1                             0.76 U2N 2021-07-28 U_2_N_7_28       U      2
##   Position Month Area Day
## 1        N     7   U2  28

Minimum number of reads per sample without the outlier:

denoise_stat_w_meta <- denoise_stat_w_meta %>% filter(input !=2232)
min(denoise_stat_w_meta$input)

## [1] 62656

total seqs generated:

sum(denoise_stat_w_meta$input)

## [1] 53199149

mean number of seqs per sample:

mean(denoise_stat_w_meta$input)

## [1] 182814.9

maximum number of seqs per sample

max(denoise_stat_w_meta$input)

## [1] 612998

total number of sequences remaining after denoising :

sum(denoise_stat_w_meta$denoised)

## [1] 23441681

mean number of seqs per sample after denoising:

mean(denoise_stat_w_meta$denoised)

## [1] 80555.6

maximum number of seqs per sample after denoising:

max(denoise_stat_w_meta$denoised)

## [1] 215532

minimum number of seqs per sample after denoising:

min(denoise_stat_w_meta$denoised)

## [1] 3272

Plot for sequencing stats

#seqtable <- select(denoise_stat_w_meta)

longseqtable <- denoise_stat_w_meta %>% pivot_longer(cols = c("input", "filtered", "denoised", "merged", "non.chimeric"), names_to = "seqtype", values_to = "number")

longseqtable$seqtype <- factor(longseqtable$seqtype, levels = c("input", "filtered", "denoised", "merged", "non.chimeric"))

ggplot(longseqtable, aes(x = seqtype, y = number)) + theme_test() +geom_boxplot() + facet_wrap(~LandUse) + scale_y_continuous(labels = comma)+ ylab("Sequence Counts") + theme(axis.text.x = element_text(angle = 45, vjust = 0.95, hjust=1)) +xlab("")

Load Data

Metadata

imported and formatted above, just convert to phyloseq object:

row.names(samplemap)<- samplemap$SampleID
META <- sample_data(samplemap)

ASVs

ps<-qza_to_phyloseq(features="merged_table.qza")

Taxonomy (Silva)

taxonomy <- read.csv("16taxonomy.csv", stringsAsFactors = FALSE, header = FALSE)
names(taxonomy) <- c("row", "tax", "Confidence") #change the headers (column names)
row.names(taxonomy) <-taxonomy[[1]] #move the feature ID column to become the row names
taxonomy <- taxonomy[,(-1)] #delete the feature ID  column 
taxonomy <-  separate(taxonomy, tax, c("Domain","Phylum", "Class", "Order", "Family", "Genus", "Species", "D7", "D8", "D9", "D10", "D11", "D12", "D13", "D14"), sep = ";", fill = "right")
taxonomy <- taxonomy[,c(1:7)]

taxonomy$D0 <- with(taxonomy, ifelse(Order == "D_3__Chloroplast", "Chloroplast", "Bacteria"))

col_order<- c("D0", "Domain","Phylum", "Class", "Order", "Family", "Genus", "Species" )
taxonomy<- taxonomy[, col_order]

taxmat <- as.matrix(taxonomy)
TAX = tax_table(taxmat)

Make complete phyloseq object:

ps = merge_phyloseq(ps, TAX, META)

Plot & filter Chloroplast 16S rRNA sequences

Plot relative abundance as stacked bar plot (all samples including chloroplast sequences):

ps<- subset_taxa(ps, D0 %in% c("Bacteria", "Chloroplast") )
psra<- transform_sample_counts(ps, function(OTU) 100* OTU/sum(OTU))
glomD1<- tax_glom(psra, 'D0')


taxabarplot<-plot_bar(glomD1, x= "SampleID", fill = "D0") +  scale_y_continuous(expand = c(0, 0)) + ggtitle("")  + theme(legend.title=element_blank()) + geom_bar(aes(fill=D0), stat="identity", position="stack", width =0.9) +theme_classic() + theme(text = element_text(size=14))+theme(axis.text.x = element_text(angle = 90)) + xlab("Sample") + ylab("Relative Abundance (%)") + theme(text = element_text(size=14)) + scale_fill_manual(values=c("lightgrey", "#7BB03B"), name = "") + xlab("") 

taxabarplot

Low chloroplast abundances.

Remove Chloroplast ASVs:

ps_nochloro <- subset_taxa(ps, Order != "D_3__Chloroplast" & Domain != "Unassigned")
ps_nochloro <- subset_samples(ps_nochloro , LandUse %in% c("U", "R") )
ps_nochloro <- subset_samples(ps_nochloro , SampleID !="U2N-28-07-2021") #remove lowest read number sample

Rarefaction Curves

allrare <- ggrare(ps_nochloro, step = 1000, color = "LandUse", se = FALSE)  + geom_line(size = 0.5) + theme(legend.position="none") + xlim(0, 10000)+ scale_color_manual(values = c("#256F64", "#C3D279", "grey"))+ theme_bw() + theme(panel.grid.minor = element_blank(), panel.grid.major = element_blank())+  theme(legend.position="none")+xlab("") +ylab("") + scale_x_continuous(labels = comma)

allrare

Zoom in on the low-read-number / low-diversity region of the plot:

rare_inset <- prune_samples(sample_sums(ps_nochloro)<=25000, ps_nochloro)

rareinset<- ggrare(rare_inset, step = 10, color = "LandUse", se = FALSE) + geom_line(size = 0.5) + theme(legend.position="none") + xlim(0, 10000)+ scale_color_manual(values = c("#256F64", "#C3D279", "grey"))+ theme_bw() + theme(panel.grid.minor = element_blank(), panel.grid.major = element_blank())+  theme(legend.position="right")+xlab("") +ylab("") + scale_x_continuous(labels = comma)

rareinset

Put the two plots next to each other (Figure S2):

(allrare + rareinset)

ggsave(“rarefaction curves.pdf”, width = 10, height = 5)

Alpha diversity

Richness (number of observed unique ASVs):

Total number of unique ASVs in the data set:

totalOTU <- data.frame(otu_table(ps_nochloro))
totalOTU$rowsu <- rowSums(totalOTU)
totalOTUnotzero <- totalOTU %>% filter(rowsu >1)
dim(totalOTUnotzero)

## [1] 53358   292

calculate richness for each sample:

library(breakaway)
library(magrittr)

## 
## Attaching package: 'magrittr'

## The following object is masked from 'package:purrr':
## 
##     set_names

## The following object is masked from 'package:tidyr':
## 
##     extract

plugin <- ps_nochloro %>%
            estimate_richness(measures = "Observed") %$% Observed
LandUse <- ps_nochloro %>% sample_data %$% LandUse
Number <- ps_nochloro %>% sample_data %$% Number
Position <- ps_nochloro %>% sample_data %$% Position
Area <- ps_nochloro %>% sample_data %$% Area
Month <- ps_nochloro %>% sample_data %$% Month
Day <-  ps_nochloro %>% sample_data %$% Day

richness<- data.frame(plugin, LandUse, Number, Position, Area, Month, Day)
names(richness) <- c("richness", "LandUse", "Number", "Position", "Area", "Month", "Day")


richness %>%group_by(LandUse, Month) %>% summarize(mean = mean(richness), min = min(richness), max = max(richness))

## `summarise()` has grouped output by 'LandUse'. You can override using the
## `.groups` argument.

## # A tibble: 24 × 5
## # Groups:   LandUse [2]
##    LandUse Month   mean   min   max
##    <fct>   <fct>  <dbl> <dbl> <dbl>
##  1 U       1     1324.    641  2787
##  2 U       2     1018.    593  1694
##  3 U       3      949.    494  1716
##  4 U       4      846      91  1560
##  5 U       5       83.7    15   146
##  6 U       6       87.4    56   121
##  7 U       7       78.8    46   116
##  8 U       8      201.     51   624
##  9 U       9      787.    211  1425
## 10 U       10    1251.    640  2252
## # ℹ 14 more rows

Plot richness as a box plot:

rich<- richness 
rich$Month <- as.factor(as.numeric(rich$Month))

RichPlot<- rich %>%  ggplot( aes(x=Month, y=richness, fill = LandUse))+ facet_grid(~ LandUse, scales = "free") +geom_boxplot() + theme_bw() + scale_fill_manual(values = c("#256F64", "#C3D279", "#75D1B7", "#256F64", "black")) + theme(text = element_text(size=14)) +ylab("Observed Richness") +ggtitle("")+ theme(panel.grid.minor = element_blank(), panel.grid.major = element_blank()) +xlab("") 

RichPlot

minimum richness (sample with lowest # of unique ASVs):

min(rich$richness)

## [1] 15

maximum richness (sample with highest # of unique ASVs):

max(rich$richness)

## [1] 2787

mean richness across all samples:

mean(rich$richness)

## [1] 636.1237

Plot individual richness data points:

RichPlot<- rich %>%  ggplot( aes(x=Month, y=richness))+ facet_grid(~ LandUse, scales = "free") +geom_point(aes(fill = Position, shape = Number), alpha = 0.7, size = 3) + theme_bw() +  scale_fill_manual(values = c("#005694","#D82354", "#FFC317"))  + theme(text = element_text(size=14)) +ylab("Observed Richness") +ggtitle("")+ theme(panel.grid.minor = element_blank(), panel.grid.major = element_blank()) + scale_shape_manual(values= c(21,24)) +guides(fill = guide_legend(override.aes=list(shape=21)))

RichPlot

Plot individual data points with mean and standard deviation (Figure S3 A):

df_mean <- data.frame(rich %>% 
  group_by(LandUse, Month) %>%
  dplyr::summarize(meanRichness = mean(richness), sd = sd(richness)) %>% ungroup())

## `summarise()` has grouped output by 'LandUse'. You can override using the
## `.groups` argument.

df_sd <- data.frame(rich %>% 
  group_by(LandUse, Month) %>%
  dplyr::summarize(meanRichness = mean(richness), sd = sd(richness)) %>% ungroup()  %>% mutate(ymin = meanRichness-sd) %>% mutate(ymax =meanRichness+sd))

## `summarise()` has grouped output by 'LandUse'. You can override using the
## `.groups` argument.

df_points <- rich
  

ggplot() + theme_bw() + geom_point(data =df_points, aes(x =Month, y = richness, color = Position, shape = Number), size = 2) + geom_errorbar(data = df_sd, aes(x = Month, y = meanRichness, ymin=ymin, ymax=ymax), width=0)  +facet_grid(~LandUse) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) + scale_color_manual(values = c("#005694","#D82354", "#FFC317")) +ylab("Observed Richness (unique ASVs)") + scale_shape_manual(values= c(21,24)) + geom_point(data = df_mean, aes(x = Month, y = meanRichness), size =10, shape ="-")

ggsave(“richness_means.pdf”, width = 8, height = 4)

Significance test for differences between U and R samples each month (Table S1):

rich$LandUse_Month <- paste0(rich$LandUse, rich$Month)

pwt<- pairwise.wilcox.test(rich$richness, rich$LandUse_Month, paired = FALSE, p.adjust = "none") 

str(pwt)

## List of 4
##  $ method         : chr "Wilcoxon rank sum test with continuity correction"
##  $ data.name      : chr "rich$richness and rich$LandUse_Month"
##  $ p.value        : num [1:23, 1:23] 0.2212 0.4491 0.2512 0.8328 0.0908 ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : chr [1:23] "R10" "R11" "R12" "R2" ...
##   .. ..$ : chr [1:23] "R1" "R10" "R11" "R12" ...
##  $ p.adjust.method: chr "none"
##  - attr(*, "class")= chr "pairwise.htest"

pwt_df<- pwt$p.value

write.csv(pwt_df, "pwt_df.csv")

Shannon Index

Calculate Shannon index for all samples, plot box plots:

plugin <- ps_nochloro  %>%
            estimate_richness(measures = "Shannon") %$% Shannon

shannon<- data.frame(plugin, LandUse, Number, Position, Area, Month, Day)
names(shannon) <- c("Shannon", "LandUse", "Number", "Position", "Area", "Month", "Day")

shannon$Month <- as.factor(as.numeric(shannon$Month))

shannPlot<- shannon %>%  ggplot( aes(x=Month, y=Shannon, fill = LandUse))+ facet_grid(~ LandUse, scales = "free") +geom_boxplot() + theme_bw() + scale_fill_manual(values = c("#256F64", "#C3D279", "#75D1B7", "black")) + theme(text = element_text(size=14)) +ylab("Shannon Index") +ggtitle("")+ theme(panel.grid.minor = element_blank(), panel.grid.major = element_blank()) +xlab("") 

shannPlot

Plot individual data points with mean and standard deviation (Figure S3 B):

df_mean <- data.frame(shannon %>% 
  group_by(LandUse, Month) %>%
  dplyr::summarize(meanShannon = mean(Shannon), sd = sd(Shannon)) %>% ungroup())

## `summarise()` has grouped output by 'LandUse'. You can override using the
## `.groups` argument.

df_sd <- data.frame(shannon %>% 
  group_by(LandUse, Month) %>%
  dplyr::summarize(meanShannon = mean(Shannon), sd = sd(Shannon)) %>% ungroup()  %>% mutate(ymin = meanShannon-sd) %>% mutate(ymax =meanShannon+sd))

## `summarise()` has grouped output by 'LandUse'. You can override using the
## `.groups` argument.

df_points <- shannon
  

ggplot() + theme_bw() + geom_point(data =df_points, aes(x =Month, y = Shannon, color = Position, shape = Number), size = 2) + geom_errorbar(data = df_sd, aes(x = Month, y = meanShannon, ymin=ymin, ymax=ymax), width=0)  +facet_grid(~LandUse) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) + scale_color_manual(values = c("#005694","#D82354", "#FFC317")) +ylab("Shannon Index") + scale_shape_manual(values= c(21,24)) + geom_point(data = df_mean, aes(x = Month, y = meanShannon), size =10, shape ="-")

ggsave(“shannon_means.pdf”, width = 8, height = 4)

Significance test for differences between U and R samples each month (Table S1):

shannon$LandUse_Month <- paste0(shannon$LandUse, shannon$Month)

pwt_shann<- pairwise.wilcox.test(shannon$Shannon, shannon$LandUse_Month, paired = FALSE, p.adjust = "none") 

pwt_shann_df<- pwt_shann$p.value

write.csv(pwt_shann_df, "pwt_shann_df.csv")

Beta Diversity

PCoA all together

Figure S4 A

field <- subset_samples(ps_nochloro, LandUse %in% c("R", "U"))


OTU4clr<- data.frame(t(data.frame(otu_table(field))))
row.names(OTU4clr) <- str_remove(row.names(OTU4clr), "X")
row.names(OTU4clr) <- gsub("\\.", "-", row.names(OTU4clr))
OTUs.clr <- codaSeq.clr(OTU4clr + 0.5, samples.by.row=TRUE)
OTU2 <- otu_table(as.matrix(OTUs.clr), taxa_are_rows = FALSE)

psCLR <- phyloseq(OTU2,TAX,META)

ordu = ordinate(psCLR, "PCoA", "euclidean")

p<-plot_ordination(psCLR, ordu)+theme_bw()  +  theme(text = element_text(size=14)) +  geom_hline(yintercept = 0, linetype = "dashed", color = "lightgrey") +  geom_vline(xintercept = 0, linetype = "dashed", color = "lightgrey") + theme(panel.grid.minor = element_blank(), panel.grid.major = element_blank()) +geom_point(aes(fill=Month, shape = LandUse), size =3)  + scale_shape_manual(values= c(21,24)) +scale_fill_manual(values=c(blue[1:3], purple[2:4], green[1:3], yellow[1:3])) +guides(fill = guide_legend(override.aes=list(shape=21)))
p

ggsave(“allSites_PCoA.pdf”, width= 6, height = 4)

Same ordination, but separated into two plots for urban and rural samples (Figure S4 B):

pf<-plot_ordination(psCLR, ordu)+theme_bw()  +  theme(text = element_text(size=14)) +  geom_hline(yintercept = 0, linetype = "dashed", color = "lightgrey") +  geom_vline(xintercept = 0, linetype = "dashed", color = "lightgrey") + theme(panel.grid.minor = element_blank(), panel.grid.major = element_blank()) +geom_point(aes(fill=Month, shape = LandUse), size =3)  + scale_shape_manual(values= c(21,24)) +scale_fill_manual(values=c(blue[1:3], purple[2:4], green[1:3], yellow[1:3])) +guides(fill = guide_legend(override.aes=list(shape=21))) + facet_grid(~LandUse)
pf

PERMANOVA

All samples by land use:

set.seed(1)

meta <- samplemap[row.names(samplemap) %in% row.names(OTU2),]

vdist = vegdist(OTU2, method = "euclidean")

adonis2(vdist ~ LandUse, data = meta)

## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = vdist ~ LandUse, data = meta)
##           Df SumOfSqs      R2      F Pr(>F)    
## LandUse    1    56428 0.02869 8.5368  0.001 ***
## Residual 289  1910296 0.97131                  
## Total    290  1966725 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Urban and Rural samples are significantly different.

PCoA for Urban and Rural samples separately

Rural PCoA

Figure 5A:

rural <- subset_samples(ps_nochloro, LandUse %in% c("R"))

OTU4clr<- data.frame(t(data.frame(otu_table(rural))))
row.names(OTU4clr) <- str_remove(row.names(OTU4clr), "X")
row.names(OTU4clr) <- gsub("\\.", "-", row.names(OTU4clr))
OTUs.clr <- codaSeq.clr(OTU4clr + 0.5, samples.by.row=TRUE)
OTU2 <- otu_table(as.matrix(OTUs.clr), taxa_are_rows = FALSE)

psCLR <- phyloseq(OTU2,TAX,META)

ordu = ordinate(psCLR, "PCoA", "euclidean")

r<-plot_ordination(psCLR, ordu)+theme_bw()  +  theme(text = element_text(size=14)) +  geom_hline(yintercept = 0, linetype = "dashed", color = "lightgrey") +  geom_vline(xintercept = 0, linetype = "dashed", color = "lightgrey") + theme(panel.grid.minor = element_blank(), panel.grid.major = element_blank()) +geom_point(aes(fill=Month, shape = Number), size =3)  + scale_shape_manual(values= c(21,24, 22)) +scale_fill_manual(values=c(blue[1:3], purple[2:4], green[1:3], yellow[1:3])) +guides(fill = guide_legend(override.aes=list(shape=21))) + ggtitle("Rural Sites")
r

ggsave(“RuralSites_pCoA.pdf”, width= 6, height = 4)

with colorblind pallette

color-blind palette made based on Paul Tol’s Introductionto color schemes: https://personal.sron.nl/~pault/

cb <- c("#004488", "#33CCEE", "#6699CC", "#CC6677", "#882255", "#AA4499", "#009988", "#117733", "#999933", "#DDCC77", "#EE7733", "#CC3311")

r<-plot_ordination(psCLR, ordu)+theme_bw()  +  theme(text = element_text(size=14)) +  geom_hline(yintercept = 0, linetype = "dashed", color = "lightgrey") +  geom_vline(xintercept = 0, linetype = "dashed", color = "lightgrey") + theme(panel.grid.minor = element_blank(), panel.grid.major = element_blank()) +geom_point(aes(fill=Month, shape = Number), size =3)  + scale_shape_manual(values= c(21,24, 22)) +scale_fill_manual(values=cb) +guides(fill = guide_legend(override.aes=list(shape=21))) + ggtitle("Rural Sites")
r

Rural PERMANOVAs

Rural samples by month: (Table 1)

set.seed(1)

meta <- samplemap[row.names(samplemap) %in% row.names(OTU2),]

vdist = vegdist(OTU2, method = "euclidean")

adonis2(vdist ~ Month, data = meta)

## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = vdist ~ Month, data = meta)
##           Df SumOfSqs      R2     F Pr(>F)    
## Month     11   235167 0.31861 5.696  0.001 ***
## Residual 134   502945 0.68139                 
## Total    145   738112 1.00000                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Rural samples are significantly different between months.

Rural samples by site number (1 or 2): (Table 1)

adonis2(vdist ~ Number, data = meta)

## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = vdist ~ Number, data = meta)
##           Df SumOfSqs      R2      F Pr(>F)   
## Number     1    16006 0.02169 3.1919  0.002 **
## Residual 144   722106 0.97831                 
## Total    145   738112 1.00000                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Rural samples are significantly different between sites (1 or 2).

Rural samples by subsite (S, C, N): (Table 1)

adonis2(vdist ~ Position, data = meta)

## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = vdist ~ Position, data = meta)
##           Df SumOfSqs      R2      F Pr(>F)
## Position   2     6979 0.00946 0.6825  0.992
## Residual 143   731132 0.99054              
## Total    145   738112 1.00000

Rural samples are NOT significantly different between subsites (S, C, N).

Urban PCoA

Figure 5 B

urban <- subset_samples(ps_nochloro, LandUse %in% c("U"))

OTU4clr<- data.frame(t(data.frame(otu_table(urban))))
row.names(OTU4clr) <- str_remove(row.names(OTU4clr), "X")
row.names(OTU4clr) <- gsub("\\.", "-", row.names(OTU4clr))
OTUs.clr <- codaSeq.clr(OTU4clr + 0.5, samples.by.row=TRUE)
OTU2 <- otu_table(as.matrix(OTUs.clr), taxa_are_rows = FALSE)

psCLR <- phyloseq(OTU2,TAX,META)

ordu = ordinate(psCLR, "PCoA", "euclidean")

u<-plot_ordination(psCLR, ordu)+theme_bw()  +  theme(text = element_text(size=14)) +  geom_hline(yintercept = 0, linetype = "dashed", color = "lightgrey") +  geom_vline(xintercept = 0, linetype = "dashed", color = "lightgrey") + theme(panel.grid.minor = element_blank(), panel.grid.major = element_blank()) +geom_point(aes(fill=Month, shape = Number), size =3)  + scale_shape_manual(values= c(21, 24)) +scale_fill_manual(values=c(blue[1:3], purple[2:4], green[1:3], yellow[1:3])) +guides(fill = guide_legend(override.aes=list(shape=21)))
u

ggsave(“UrbanSites_pCoA.pdf”, width= 6, height = 4)

with colorblind colors

u<-plot_ordination(psCLR, ordu)+theme_bw()  +  theme(text = element_text(size=14)) +  geom_hline(yintercept = 0, linetype = "dashed", color = "lightgrey") +  geom_vline(xintercept = 0, linetype = "dashed", color = "lightgrey") + theme(panel.grid.minor = element_blank(), panel.grid.major = element_blank()) +geom_point(aes(fill=Month, shape = Number), size =3)  + scale_shape_manual(values= c(21, 24)) +scale_fill_manual(values=cb) +guides(fill = guide_legend(override.aes=list(shape=21)))
u

Urban PERMANOVAs

Urban samples by month: (Table 1)

set.seed(1)

meta <- samplemap[row.names(samplemap) %in% row.names(OTU2),]

vdist = vegdist(OTU2, method = "euclidean")

adonis2(vdist ~ Month, data = meta)

## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = vdist ~ Month, data = meta)
##           Df SumOfSqs      R2      F Pr(>F)    
## Month     11   250662 0.21384 3.2888  0.001 ***
## Residual 133   921522 0.78616                  
## Total    144  1172184 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Urban samples are significantly different between months.

Urban samples by site number (1 or 2): (Table 1)

adonis2(vdist ~ Number, data = meta)

## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = vdist ~ Number, data = meta)
##           Df SumOfSqs      R2      F Pr(>F)    
## Number     1    49194 0.04197 6.2643  0.001 ***
## Residual 143  1122990 0.95803                  
## Total    144  1172184 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Urban samples are significantly different between sites (1 or 2).

Urban samples by subsite (S, C, N): (Table 1)

adonis2(vdist ~ Position, data = meta)

## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = vdist ~ Position, data = meta)
##           Df SumOfSqs     R2      F Pr(>F)    
## Position   2    57081 0.0487 3.6344  0.001 ***
## Residual 142  1115103 0.9513                  
## Total    144  1172184 1.0000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Urban samples are significantly different between subsites (S, C, N).

Rel Abundance Bar Plots

all orders

ps_RA<- transform_sample_counts(field, function(OTU) 100* OTU/sum(OTU))

ps_RA_glomO<- tax_glom(ps_RA, 'Order')

ps_RA_glomO_F = filter_taxa(ps_RA_glomO , function(x) sum(x) > 1, TRUE)

taxabarplot<-plot_bar(ps_RA_glomO_F, x= "date", fill = "Order") +  scale_y_continuous(expand = c(0, 0)) + ggtitle("")  + theme(legend.title=element_blank()) + geom_bar(aes(fill=Order), stat="identity", position="stack", width =0.9) +theme_classic() + theme(text = element_text(size=14))+theme(axis.text.x = element_text(angle = 90)) + xlab("Sample") + ylab("Relative Abundance (%)") + theme(text = element_text(size=14)) + scale_fill_manual(values= rep( colors30, 30))+ xlab("")+ facet_grid(LandUse~Number+Position, scales = "free")

taxabarplot+ theme(legend.position="none")

Too many to plot …

taxabarplot

all orders that make up less than one percent of a sample get grouped as “<1% abund.” for that sample - if they are more abundant in another sample, they remain named in that sample: (Figure S6)

mergemelt<- psmelt(ps_RA_glomO)

mergemelt$Order<-str_sub(mergemelt$Order, 6, str_length(mergemelt$Order))

mergemelt$Order[mergemelt$Abundance < 1] <- "z< 1% abund."

mergemelt<- mergemelt %>% 
  filter(!str_detect(Order, "Candidatus")) %>% filter(Order != "") %>% filter(!str_detect(Order, "uncultured"))

mergemelt$datePCT <- as.POSIXct(mergemelt$date, format= "%Y-%m-$d")

spatial_plot <- ggplot(data=mergemelt, aes(x=datePCT, y=Abundance, fill=Order)) + facet_grid(LandUse~Number+Position)

spatial_plot + geom_bar(aes(), stat="identity", position="stack") + 
  theme_classic() + theme(legend.position="bottom") + scale_fill_manual(values = rep(colors30, 30)) +guides(fill=guide_legend(ncol=6)) + theme(legend.title =element_blank()) + scale_x_datetime(date_breaks = "1 month",labels = date_format("%b"))+
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

Bubble Plots

All orders

Easier way to visualize the relative abundance in all samples (Figure 6)

mergemelt <- mergemelt %>% filter(Order %ni% c("Candidatus Campbellbacteria" , "Candidatus Falkowbacteria",    "Candidatus Magasanikbacteria","Candidatus Moranbacteria"  ,   "Candidatus Nomurabacteria" ,   "Candidatus Peregrinibacteria", "", "uncultured" , "uncultured bacterium"  ))

mergemelt$LandUse <- factor(mergemelt$LandUse, levels = c("U", "R"))
mergemelt$Position <- factor(mergemelt$Position, levels = c("S", "C", "N"))


bubble <- mergemelt %>% ggplot(aes(x=date, y = Order))+ theme_bw()  + geom_point(aes(size = Abundance, fill = Month), shape = 21, color = "black")+
  theme( axis.text.x=element_text(angle=90,hjust=1,vjust=0),
          panel.background = element_blank()) +
    ylab("") +
    xlab("")+ 
  facet_grid(~LandUse+Number+Position) +scale_fill_manual(values=c(blue[1:3], purple[2:4], green[1:3], yellow[1:3]))

bubble

ggsave(“allOrder_bubblesF.pdf”,height = 8, width = 11)

colorblind versions

bubble <- mergemelt %>% ggplot(aes(x=date, y = Order))+ theme_bw()  + geom_point(aes(size = Abundance, fill = Month), shape = 21, color = "black")+
  theme( axis.text.x=element_text(angle=90,hjust=1,vjust=0),
          panel.background = element_blank()) +
    ylab("") +
    xlab("")+ 
  facet_grid(~LandUse+Number+Position) +scale_fill_manual(values=cb)

bubble

filtered rel abund bar plots (Orange, red, orange and red)

Orange orders

orders that are prevalent in urban samples but rare in rural samples:

Uorders <- mergemelt %>% filter(Order %in% c("Vibrionales", "Thiotrichales", "Sphingomonadales", "Chitinophagales", "Campylobacterales", "Betaproteobacteriales", "Bacteroidales", "Alteromonadales", "Aeromonadales"))


spatial_plot <- ggplot(data=Uorders, aes(x=date, y=Abundance, fill=Order)) + facet_grid(LandUse~Number+Position)

spatial_plot + geom_bar(aes(), stat="identity", position="stack") + 
  theme_classic() + theme(legend.position="right") + scale_fill_manual(values = rep(colors30, 30)) + theme(legend.title =element_blank())+ theme( axis.text.x=element_text(angle=90,hjust=1,vjust=0))

ggsave(“orange_orders_barplot.pdf”, width = 8, height = 4)

Red Orders

orders that are found in at least 3 urban samples and are never found in rural samples:

red_orders <- mergemelt %>% filter(Order %in% c("Thiomicrospirales", "Sphingobacteriales", "Selenomondales", "Saccharimonadales", "Pseudomonadales", "Lactobacillales", "Clostridiales", "Cloacimonadales"))


spatial_plot <- ggplot(data=red_orders, aes(x=date, y=Abundance, fill=Order)) + facet_grid(LandUse~Number+Position)

spatial_plot + geom_bar(aes(), stat="identity", position="stack") + 
  theme_classic() + theme(legend.position="right") + scale_fill_manual(values = rev(colors30))  + theme(legend.title =element_blank()) +theme( axis.text.x=element_text(angle=90,hjust=1,vjust=0))

ggsave(“red_orders_barplot.pdf”, width = 8, height = 3)

Red+Orange Orders

combined (Figure 7)

Red_orange_orders <- mergemelt %>% filter(Order %in% c("Vibrionales", "Thiotrichales","Thiomicrospirales", "Sphingomonadales", "Sphingobacteriales", "Selenomondales", "Saccharimonadales", "Pseudomonadales", "Lactobacillales", "Clostridiales", "Cloacimonadales", "Chitinophagales", "Campylobacterales", "Betaproteobacteriales", "Bacteroidales", "Alteromonadales", "Aeromonadales"))

spatial_plot <- ggplot(data=Red_orange_orders, aes(x=date, y=Abundance, fill=Order)) + facet_grid(LandUse~Number+Position)

spatial_plot + geom_bar(aes(), stat="identity", position="stack") + 
  theme_classic() + theme(legend.position="bottom") + scale_fill_manual(values = rep(colors30, 30)) +guides(fill=guide_legend(nrow=4)) + theme(legend.title =element_blank()) + theme( axis.text.x=element_text(angle=90,hjust=1,vjust=0))

ggsave(“red_orange_orders_barplot.pdf”, width = 7, height = 5)

summary percents of red/orange orders

summary_percents <- Red_orange_orders %>% group_by(Sample, SS, Name, LandUse, Number, Position, Month, Area, Day) %>% summarise(total = sum(Abundance)) %>% group_by(LandUse, Number) %>% summarise(max= max(total), min = min(total), mean = mean(total))

## `summarise()` has grouped output by 'Sample', 'SS', 'Name', 'LandUse',
## 'Number', 'Position', 'Month', 'Area'. You can override using the `.groups`
## argument.
## `summarise()` has grouped output by 'LandUse'. You can override using the
## `.groups` argument.

summary_percents

## # A tibble: 4 × 5
## # Groups:   LandUse [2]
##   LandUse Number   max   min  mean
##   <fct>   <chr>  <dbl> <dbl> <dbl>
## 1 U       1      63.8   1.33 19.6 
## 2 U       2      57.9   1.15 15.4 
## 3 R       1       9.75  1.00  3.30
## 4 R       2       9.84  1.01  2.71

Network Analysis

Urban samples

Trying to get the same exact subset as the bars and bubble plots:

ps_glomO <- tax_glom(field, 'Order')
mergemeltO<- psmelt(ps_glomO)

names(mergemelt)[3] <- "RelAbundance"

mergemelt <- mergemelt %>% select(OTU, Sample, RelAbundance, Order, LandUse, Number)
mergemeltO <- mergemeltO %>%  select(OTU, Sample, Abundance, Order)
mergemeltO$Order<-str_sub(mergemeltO$Order, 6, str_length(mergemeltO$Order))

mergeO_together<- left_join(mergemelt, mergemeltO, by = c('OTU' = 'OTU', 'Sample' = 'Sample', 'Order' = 'Order'))

mergemeltU <- mergeO_together %>% filter(LandUse == "U") %>% filter(Order != "z< 1% abund.")


mergeU <- mergemeltU %>% select(Sample, Order, Abundance) %>% group_by(Sample, Order) %>% summarize(total = sum(Abundance))  %>% ungroup()

## `summarise()` has grouped output by 'Sample'. You can override using the
## `.groups` argument.

mergeU_wider<- mergeU%>%  pivot_wider(names_from = Order, values_from = total, values_fill = 0)

mergeU_wider[is.na(mergeU_wider)] <- 0

mergeU_widerdf <- data.frame(mergeU_wider) 

row.names(mergeU_widerdf)<- mergeU_widerdf$Sample
mergeU_widerdf<- mergeU_widerdf[, -1]

Rural samples

mergemeltR <- mergeO_together %>% filter(LandUse == "R") %>% filter(Order != "z< 1% abund.")

mergeR <- mergemeltR %>% select(Sample, Order, Abundance) %>% group_by(Sample, Order) %>% summarize(total = sum(Abundance))  %>% ungroup()

## `summarise()` has grouped output by 'Sample'. You can override using the
## `.groups` argument.

mergeR_wider<- mergeR%>%  pivot_wider(names_from = Order, values_from = total, values_fill = 0)

mergeR_wider[is.na(mergeR_wider)] <- 0

mergeR_widerdf <- data.frame(mergeR_wider) 

row.names(mergeR_widerdf)<- mergeR_widerdf$Sample
mergeR_widerdf<- mergeR_widerdf[, -1]

Rural Network

pargs2 <- list(rep.num=50, seed=10010, ncores=10)

OTU4corr_named_mat<- as.matrix(mergeR_widerdf)

se <- spiec.easi(OTU4corr_named_mat, method='glasso', lambda.min.ratio=1e-2,
                          nlambda=20, pulsar.params=pargs2)

## Applying data transformations...

## Selecting model with pulsar using stars...

## Fitting final estimate with glasso...

## done

se.mb <- spiec.easi(OTU4corr_named_mat, method='mb', lambda.min.ratio=1e-2,
                          nlambda=20, pulsar.params=pargs2)

## Applying data transformations...

## Selecting model with pulsar using stars...

## Fitting final estimate with mb...

## done

sparcc.oki<- sparcc(OTU4corr_named_mat)

sparcc.graph <- abs(sparcc.oki$Cor) >= 0.3
diag(sparcc.graph) <- 0
library(Matrix)

## 
## Attaching package: 'Matrix'

## The following objects are masked from 'package:SpiecEasi':
## 
##     tril, triu

## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack

sparcc.graph <- Matrix(sparcc.graph, sparse=TRUE)

## Create igraph objects
ig.mb     <- adj2igraph(getRefit(se.mb))
ig.gl     <- adj2igraph(getRefit(se))
ig.sparcc <- adj2igraph(sparcc.graph)

Comparing different models for finding network connections:

library(igraph)

## 
## Attaching package: 'igraph'

## The following objects are masked from 'package:ape':
## 
##     degree, edges, mst, ring

## The following object is masked from 'package:vegan':
## 
##     diversity

## The following object is masked from 'package:permute':
## 
##     permute

## The following object is masked from 'package:SpiecEasi':
## 
##     make_graph

## The following objects are masked from 'package:dplyr':
## 
##     as_data_frame, groups, union

## The following objects are masked from 'package:purrr':
## 
##     compose, simplify

## The following object is masked from 'package:tidyr':
## 
##     crossing

## The following object is masked from 'package:tibble':
## 
##     as_data_frame

## The following objects are masked from 'package:lubridate':
## 
##     %--%, union

## The following objects are masked from 'package:stats':
## 
##     decompose, spectrum

## The following object is masked from 'package:base':
## 
##     union

## set size of vertex proportional to clr-mean
vsize    <- clr(rowMeans(OTU4corr_named_mat, 1))+6
am.coord <- layout.fruchterman.reingold(ig.mb)

par(mfrow=c(1,3))
plot(ig.mb, layout=am.coord, vertex.size=vsize, vertex.label=NA, main="MB")

## Warning in layout[, 1] + label.dist * cos(-label.degree) * (vertex.size + :
## longer object length is not a multiple of shorter object length

## Warning in layout[, 2] + label.dist * sin(-label.degree) * (vertex.size + :
## longer object length is not a multiple of shorter object length

plot(ig.gl, layout=am.coord, vertex.size=vsize, vertex.label=NA, main="glasso")

## Warning in layout[, 1] + label.dist * cos(-label.degree) * (vertex.size + :
## longer object length is not a multiple of shorter object length

## Warning in layout[, 1] + label.dist * cos(-label.degree) * (vertex.size + :
## longer object length is not a multiple of shorter object length

plot(ig.sparcc, layout=am.coord, vertex.size=vsize, vertex.label=NA, main="sparcc")

## Warning in layout[, 1] + label.dist * cos(-label.degree) * (vertex.size + :
## longer object length is not a multiple of shorter object length

## Warning in layout[, 1] + label.dist * cos(-label.degree) * (vertex.size + :
## longer object length is not a multiple of shorter object length

Looking at number of vertices and edges in MB model:

V(ig.mb)

## + 29/29 vertices, named, from 27833e4:
##  [1] 1  2  3  4  5  6  7  8  9  10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
## [26] 26 27 28 29

E(ig.mb)

## + 18/18 edges from 27833e4 (vertex names):
##  [1]  1-- 6  3-- 7  3-- 8  6--10  6--12  6--21  6--26  6--27  6--28  6--29
## [11]  8--10 12--21 12--26 12--28 12--29 19--23 20--24 23--29

V(ig.gl)$name <- colnames(OTU4corr_named_mat)
V(ig.mb)$name <- colnames(OTU4corr_named_mat)

custombluegreen<-rgb(0/255,158/255,115/255)

V(ig.gl)$color <- "grey"

E(ig.gl)$color <- custombluegreen
#E(ig.gl)$color[E(ig.gl)$weight<0] <- customreddishpurple
E(ig.gl)$width <- abs(E(ig.gl)$weight)*2

V(ig.mb)$color <- "grey"

E(ig.mb)$color <- custombluegreen
#E(ig.mb)$color[E(ig.mb)$weight<0] <- customreddishpurple
E(ig.mb)$width <- abs(E(ig.mb)$weight)*2


par(mfrow=c(1,2))
plot(ig.gl, vertex.size=vsize, vertex.label.dist=1.1,
     vertex.label.cex=0.5,
     vertex.label.color="black", main="R1+R2 glasso", layout=am.coord)

## Warning in layout[, 1] + label.dist * cos(-label.degree) * (vertex.size + :
## longer object length is not a multiple of shorter object length

## Warning in layout[, 2] + label.dist * sin(-label.degree) * (vertex.size + :
## longer object length is not a multiple of shorter object length

plot(ig.mb, vertex.size=vsize, vertex.label.dist=1.1,
     vertex.label.cex=0.5,
     vertex.label.color="black", main="R1+R2 mb", layout=am.coord)

## Warning in layout[, 1] + label.dist * cos(-label.degree) * (vertex.size + :
## longer object length is not a multiple of shorter object length

## Warning in layout[, 1] + label.dist * cos(-label.degree) * (vertex.size + :
## longer object length is not a multiple of shorter object length

#layout=am.coord
#layout=layout_with_kk(ig.gl)

Figure 8B

plot(ig.mb, vertex.size=vsize, vertex.label.dist=1.1,
     vertex.label.cex=0.5,
     vertex.label.color="black", main="R1+R2 mb", layout=am.coord)

## Warning in layout[, 1] + label.dist * cos(-label.degree) * (vertex.size + :
## longer object length is not a multiple of shorter object length

## Warning in layout[, 2] + label.dist * sin(-label.degree) * (vertex.size + :
## longer object length is not a multiple of shorter object length

# pdf(file = "Rural_mb_network_sizescale.pdf",   
#      width = 8, 
#      height = 8)
# plot(ig.mb, vertex.size=vsize, vertex.label.dist=1.1,
#      vertex.label.cex=0.5,
#      vertex.label.color="black", main="R1+R2 mb", layout=am.coord)
# dev.off()

plot(density((E(ig.gl)$weight)),xlab="Edge Weight",main="")

Edges are primarily positive

Urban Network

pargs2 <- list(rep.num=50, seed=10010, ncores=10)

OTU4corr_named_mat<- as.matrix(mergeU_widerdf)

se <- spiec.easi(OTU4corr_named_mat, method='glasso', lambda.min.ratio=1e-2,
                          nlambda=20, pulsar.params=pargs2)

## Applying data transformations...

## Selecting model with pulsar using stars...

## Fitting final estimate with glasso...

## done

se.mb <- spiec.easi(OTU4corr_named_mat, method='mb', lambda.min.ratio=1e-2,
                          nlambda=20, pulsar.params=pargs2)

## Applying data transformations...

## Selecting model with pulsar using stars...

## Fitting final estimate with mb...

## done

sparcc.oki<- sparcc(OTU4corr_named_mat)

sparcc.graph <- abs(sparcc.oki$Cor) >= 0.3
diag(sparcc.graph) <- 0
library(Matrix)
sparcc.graph <- Matrix(sparcc.graph, sparse=TRUE)

## Create igraph objects
ig.mb     <- adj2igraph(getRefit(se.mb))
ig.gl     <- adj2igraph(getRefit(se))
ig.sparcc <- adj2igraph(sparcc.graph)

## set size of vertex proportional to clr-mean
vsize    <- clr(rowMeans(OTU4corr_named_mat, 1))+6
am.coord <- layout.fruchterman.reingold(ig.mb)

par(mfrow=c(1,3))
plot(ig.mb, layout=am.coord, vertex.size=vsize, vertex.label=NA, main="MB")

## Warning in layout[, 1] + label.dist * cos(-label.degree) * (vertex.size + :
## longer object length is not a multiple of shorter object length

## Warning in layout[, 2] + label.dist * sin(-label.degree) * (vertex.size + :
## longer object length is not a multiple of shorter object length

plot(ig.gl, layout=am.coord, vertex.size=vsize, vertex.label=NA, main="glasso")

## Warning in layout[, 1] + label.dist * cos(-label.degree) * (vertex.size + :
## longer object length is not a multiple of shorter object length

## Warning in layout[, 1] + label.dist * cos(-label.degree) * (vertex.size + :
## longer object length is not a multiple of shorter object length

plot(ig.sparcc, layout=am.coord, vertex.size=vsize, vertex.label=NA, main="sparcc")

## Warning in layout[, 1] + label.dist * cos(-label.degree) * (vertex.size + :
## longer object length is not a multiple of shorter object length

## Warning in layout[, 1] + label.dist * cos(-label.degree) * (vertex.size + :
## longer object length is not a multiple of shorter object length

V(ig.mb)

## + 51/51 vertices, named, from eb4460a:
##  [1] 1  2  3  4  5  6  7  8  9  10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
## [26] 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50
## [51] 51

E(ig.mb)

## + 48/48 edges from eb4460a (vertex names):
##  [1]  2-- 3  2--14  3--14  3--19  4-- 6  8--16  8--20  8--22  8--23  8--29
## [11]  8--41  8--42  8--45  8--47  8--50  8--51  9--10  9--11  9--21 10--37
## [21] 10--38 11--37 12--44 14--25 15--25 15--36 16--20 16--22 16--23 16--29
## [31] 16--45 16--47 16--51 18--21 20--22 20--23 20--47 20--51 21--26 22--47
## [41] 24--27 26--28 27--28 30--32 30--33 31--33 40--48 48--49

V(ig.gl)$name <- colnames(OTU4corr_named_mat)
V(ig.mb)$name <- colnames(OTU4corr_named_mat)

V(ig.gl)$color <- "grey"

E(ig.gl)$color <- custombluegreen
#E(ig.gl)$color[E(ig.gl)$weight<0] <- customreddishpurple
E(ig.gl)$width <- abs(E(ig.gl)$weight)*2

V(ig.mb)$color <- "grey"

E(ig.mb)$color <- custombluegreen
#E(ig.mb)$color[E(ig.mb)$weight<0] <- customreddishpurple
E(ig.mb)$width <- abs(E(ig.mb)$weight)*2


par(mfrow=c(1,2))
plot(ig.gl, vertex.size=vsize, vertex.label.dist=1.1,
     vertex.label.cex=0.5,
     vertex.label.color="black", main="U1+U2 glasso", layout=am.coord)

## Warning in layout[, 1] + label.dist * cos(-label.degree) * (vertex.size + :
## longer object length is not a multiple of shorter object length

## Warning in layout[, 2] + label.dist * sin(-label.degree) * (vertex.size + :
## longer object length is not a multiple of shorter object length

plot(ig.mb, vertex.size=vsize, vertex.label.dist=1.1,
     vertex.label.cex=0.5,
     vertex.label.color="black", main="U1+U2 mb", layout=am.coord)

## Warning in layout[, 1] + label.dist * cos(-label.degree) * (vertex.size + :
## longer object length is not a multiple of shorter object length

## Warning in layout[, 1] + label.dist * cos(-label.degree) * (vertex.size + :
## longer object length is not a multiple of shorter object length

#layout=am.coord
#layout=layout_with_kk(ig.gl)

Figure 8A

plot(ig.mb, vertex.size=vsize, vertex.label.dist=1.1,
     vertex.label.cex=0.5,
     vertex.label.color="black", main="U1+U2 mb", layout=am.coord)

## Warning in layout[, 1] + label.dist * cos(-label.degree) * (vertex.size + :
## longer object length is not a multiple of shorter object length

## Warning in layout[, 2] + label.dist * sin(-label.degree) * (vertex.size + :
## longer object length is not a multiple of shorter object length

# pdf(file = "Urban_mb_network_sizescale.pdf",   
#      width = 8, 
#      height = 8)
# plot(ig.mb, vertex.size=vsize, vertex.label.dist=1.1,
#      vertex.label.cex=0.5,
#      vertex.label.color="black", main="R1+R2 mb", layout=am.coord)
# dev.off()

plot(density((E(ig.gl)$weight)),xlab="Edge Weight",main="")

Bubble plots for individual orders

Look at which genera/species are abundant within orders of interest

Vibrionales

ps_RA_glomG<- tax_glom(ps_RA, 'Genus')
genusmelt <- psmelt(ps_RA_glomG)

genusmelt$Order<-str_sub(genusmelt$Order, 6, str_length(genusmelt$Order))

genusmelt <- genusmelt %>% filter(Order %in% c("Vibrionales", "Thiotrichales","Thiomicrospirales", "Sphingomonadales", "Sphingobacteriales", "Selenomondales", "Saccharimonadales", "Pseudomonadales", "Lactobacillales", "Clostridiales", "Cloacimonadales", "Chitinophagales", "Campylobacterales", "Betaproteobacteriales", "Bacteroidales", "Alteromonadales", "Aeromonadales")) 

genusmelt_group_filter <- genusmelt %>% group_by(LandUse, Number, Position, date, Order) %>% mutate(ordersum = sum(Abundance)) %>% filter(ordersum>=1)

genusmelt_group_filter <- genusmelt_group_filter  %>% filter(Abundance>0)

#mergemelt$Order[mergemelt$Abundance < 0.1] <- "z< 1% abund."

mergemelt_vibrio <- genusmelt_group_filter %>% filter(Order == "Vibrionales")


bubble <- mergemelt_vibrio %>% ggplot(aes(x=date, y = Genus))+ theme_bw()  + geom_point(aes(size = Abundance, fill = Month), shape = 21, color = "black")+
  theme( axis.text.x=element_text(angle=90,hjust=1,vjust=0),
          panel.background = element_blank()) +
    ylab("") +
    xlab("")+ 
  facet_grid(~LandUse+Number+Position) +scale_fill_manual(values=c(blue[1:3], purple[2:4], green[1:3], yellow[1:3]))

bubble

Vibrionales species:

ps_RA_glomS<- tax_glom(ps_RA, 'Species')
speciesmelt <- psmelt(ps_RA_glomS)

speciesmelt$Order<-str_sub(speciesmelt$Order, 6, str_length(speciesmelt$Order))

speciesmelt <- speciesmelt %>% filter(Order %in% c("Vibrionales", "Thiotrichales","Thiomicrospirales", "Sphingomonadales", "Sphingobacteriales", "Selenomondales", "Saccharimonadales", "Pseudomonadales", "Lactobacillales", "Clostridiales", "Cloacimonadales", "Chitinophagales", "Campylobacterales", "Betaproteobacteriales", "Bacteroidales", "Alteromonadales", "Aeromonadales")) 

speciesmelt_group_filter <- speciesmelt %>% group_by(LandUse, Number, Position, date, Order) %>% mutate(ordersum = sum(Abundance)) %>% filter(ordersum>=1)

#mergemelt$Order[mergemelt$Abundance < 0.1] <- "z< 1% abund."

speciesmelt_vibrio <- speciesmelt_group_filter %>% filter(Order == "Vibrionales")

speciesmelt_vibrio <- speciesmelt_vibrio %>% filter(Abundance > 0)

bubble <- speciesmelt_vibrio %>% ggplot(aes(x=date, y = Species))+ theme_bw()  + geom_point(aes(size = Abundance, fill = Month), shape = 21, color = "black")+
  theme( axis.text.x=element_text(angle=90,hjust=1,vjust=0),
          panel.background = element_blank()) +
    ylab("") +
    xlab("")+ 
  facet_grid(~LandUse+Number+Position) +scale_fill_manual(values=c(blue[1:3], purple[2:4], green[1:3], yellow[1:3]))

bubble

Campylobacterales

mergemelt_camp <- genusmelt_group_filter %>% filter(Order == "Campylobacterales")

bubble <- mergemelt_camp %>% ggplot(aes(x=date, y = Genus))+ theme_bw()  + geom_point(aes(size = Abundance, fill = Month), shape = 21, color = "black")+
  theme( axis.text.x=element_text(angle=90,hjust=1,vjust=0),
          panel.background = element_blank()) +
    ylab("") +
    xlab("")+ 
  facet_grid(~LandUse+Number+Position) +scale_fill_manual(values=c(blue[1:3], purple[2:4], green[1:3], yellow[1:3]))

bubble

Campylobacterales species

speciesmelt_camp <- speciesmelt_group_filter %>% filter(Order == "Campylobacterales")

speciesmelt_camp <- speciesmelt_camp %>% filter(Abundance > 0)

bubble <- speciesmelt_camp %>% ggplot(aes(x=date, y = Species))+ theme_bw()  + geom_point(aes(size = Abundance, fill = Month), shape = 21, color = "black")+
  theme( axis.text.x=element_text(angle=90,hjust=1,vjust=0),
          panel.background = element_blank()) +
    ylab("") +
    xlab("")+ 
  facet_grid(~LandUse+Number+Position) +scale_fill_manual(values=c(blue[1:3], purple[2:4], green[1:3], yellow[1:3]))

bubble

Betaproteobacteriales

mergemelt_betabact <- genusmelt_group_filter %>% filter(Order == "Betaproteobacteriales")

bubble <- mergemelt_betabact %>% ggplot(aes(x=date, y = Genus))+ theme_bw()  + geom_point(aes(size = Abundance, fill = Month), shape = 21, color = "black")+
  theme( axis.text.x=element_text(angle=90,hjust=1,vjust=0),
          panel.background = element_blank()) +
    ylab("") +
    xlab("")+ 
  facet_grid(~LandUse+Number+Position) +scale_fill_manual(values=c(blue[1:3], purple[2:4], green[1:3], yellow[1:3]))

bubble

Betaproteobacteriales species

speciesmelt_betabact <- speciesmelt_group_filter %>% filter(Order == "Betaproteobacteriales")

speciesmelt_betabact <- speciesmelt_betabact %>% filter(Abundance > 0)

bubble <- speciesmelt_betabact %>% ggplot(aes(x=date, y = Family))+ theme_bw()  + geom_point(aes(size = Abundance, fill = Month), shape = 21, color = "black")+
  theme( axis.text.x=element_text(angle=90,hjust=1,vjust=0),
          panel.background = element_blank()) +
    ylab("") +
    xlab("")+ 
  facet_grid(~LandUse+Number+Position) +scale_fill_manual(values=c(blue[1:3], purple[2:4], green[1:3], yellow[1:3]))

bubble

Bacteroidales

mergemelt_bacteroid <- genusmelt_group_filter %>% filter(Order == "Bacteroidales")

bubble <- mergemelt_bacteroid %>% ggplot(aes(x=date, y = Genus))+ theme_bw()  + geom_point(aes(size = Abundance, fill = Month), shape = 21, color = "black")+
  theme( axis.text.x=element_text(angle=90,hjust=1,vjust=0),
          panel.background = element_blank()) +
    ylab("") +
    xlab("")+ 
  facet_grid(~LandUse+Number+Position) +scale_fill_manual(values=c(blue[1:3], purple[2:4], green[1:3], yellow[1:3]))

bubble

Clostridiales

mergemelt_clos <-genusmelt_group_filter%>% filter(Order == "Clostridiales") %>% filter(Abundance > 0)

bubble <-mergemelt_clos %>% ggplot(aes(x=date, y = Genus))+ theme_bw()  + geom_point(aes(size = Abundance, fill = Month), shape = 21, color = "black")+
  theme( axis.text.x=element_text(angle=90,hjust=1,vjust=0),
          panel.background = element_blank()) +
    ylab("") +
    xlab("")+ 
  facet_grid(~LandUse+Number+Position) +scale_fill_manual(values=c(blue[1:3], purple[2:4], green[1:3], yellow[1:3]))

bubble

Pseudomonadales

mergemelt_pseudo <- genusmelt_group_filter %>% filter(Order == "Pseudomonadales")

bubble <-mergemelt_pseudo %>% ggplot(aes(x=date, y = Genus))+ theme_bw()  + geom_point(aes(size = Abundance, fill = Month), shape = 21, color = "black")+
  theme( axis.text.x=element_text(angle=90,hjust=1,vjust=0),
          panel.background = element_blank()) +
    ylab("") +
    xlab("")+ 
  facet_grid(~LandUse+Number+Position) +scale_fill_manual(values=c(blue[1:3], purple[2:4], green[1:3], yellow[1:3]))

bubble

filter for pseudomonas + Acinetobacter genus, then species glom

ps_RA_pseudo <- subset_taxa(ps_RA, Genus %in% c("D_5__Pseudomonas", "D_5__Acinetobacter"))
ps_RA_glomS<- tax_glom(ps_RA_pseudo , 'Species')
pseudomelt <- psmelt(ps_RA_glomS)

bubble <-pseudomelt %>% ggplot(aes(x=date, y = Species))+ theme_bw()  + geom_point(aes(size = Abundance, fill = Month), shape = 21, color = "black")+
  theme( axis.text.x=element_text(angle=90,hjust=1,vjust=0),
          panel.background = element_blank()) +
    ylab("") +
    xlab("")+ 
  facet_grid(~LandUse+Number+Position) +scale_fill_manual(values=c(blue[1:3], purple[2:4], green[1:3], yellow[1:3]))

bubble

Physicochemical data

CTD probe:

rink <- read.csv("mean_sd_rinko.csv")
str(rink)

## 'data.frame':    311 obs. of  25 variables:
##  $ X                : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ date             : chr  "2020/09/17" "2020/09/17" "2020/09/17" "2020/09/17" ...
##  $ location         : chr  "R1" "R1" "R1" "R2" ...
##  $ sample           : chr  "R1C" "R1N" "R1S" "R2C" ...
##  $ Position         : chr  "C" "N" "S" "C" ...
##  $ water_temp_Mean  : num  31.1 30.5 29.7 29.4 31.1 ...
##  $ water_temp_SD    : num  0.00734 0.01784 0.01009 0.02181 0.00896 ...
##  $ salinity_Mean    : num  28.5 34.2 33.5 28.7 33.2 ...
##  $ salinity_SD      : num  0.0112 0.02 0.0503 0.0171 0.0111 ...
##  $ conductivity_Mean: num  49.5 57.7 55.8 48.4 56.8 ...
##  $ conductivity_SD  : num  0.0122 0.0374 0.0709 0.0383 0.0169 ...
##  $ ec25_Mean        : num  43658 51432 50620 44118 50035 ...
##  $ ec25_SD          : num  15 27 68.3 23.3 15.9 ...
##  $ density_Mean     : num  1016 1021 1021 1017 1020 ...
##  $ density_SD       : num  0.0109 0.0164 0.0419 0.0129 0.0117 ...
##  $ sT_Mean          : num  16.5 20.9 20.7 17.2 20 ...
##  $ sT_SD            : num  0.0108 0.0164 0.042 0.0125 0.0117 ...
##  $ chl_f_Mean       : num  0.643 0.746 0.671 0.266 0.333 ...
##  $ chl_f_SD         : num  0.0227 0.052 0.0438 0.0185 0.0362 ...
##  $ chl_a_Mean       : num  0.643 0.746 0.671 0.266 0.333 ...
##  $ chl_a_SD         : num  0.0227 0.052 0.0438 0.0185 0.0362 ...
##  $ tur_range_Mean   : num  2.984 3.388 1.638 1.305 0.961 ...
##  $ tur_range_SD     : num  0.192 0.317 0.17 0.131 0.163 ...
##  $ do_Mean          : num  93.4 92.9 94.7 102.7 99 ...
##  $ do_SD            : num  0.358 0.234 0.129 0.928 0.433 ...

rink <- rink %>% separate(date, c("year", "month", "day"), sep = "/", remove = FALSE)
#rink$day <- sub("^0+", "", rink$day) 
#rink$month <- sub("^0+", "", rink$month) 

rink$number<-substr(rink$location, 2, 2)
rink$UR <- substr(rink$location, 1, 1)

rink<- rink %>% unite(Name, c(sample,year, month, day), sep = "-", remove = FALSE)

nutrients:

nuts <- read.csv("nuts_microMtotal.csv")

nuts <- nuts %>% separate(dates, c("year", "month", "day"), sep = "-", remove = FALSE) %>% unite(col = 'Name', c(siteID, year, month, day), sep ='-') 

nuts$number<-substr(nuts$area, 2, 2)
nuts$UR <- substr(nuts$area, 1, 1)
nuts$Position <- nuts$sitepos

nuts<- nuts %>% select(Name, replicate, landuse, dates, NO3, NO2, NH4,PO4, SiO2)

nuts_avg <- nuts %>% group_by(Name, landuse, dates) %>% summarise_at(.vars = c("NO3", "NO2", "NH4","PO4", "SiO2"), .funs = c(mean="mean")) %>% ungroup()

Put CTD and nutrient data into single dataframe:

env <- merge(nuts_avg, rink, by = "Name")
# get r1 r2 number and c n s into separate columns

env_clean <- env %>%  select(Name, landuse,location, Position, sample, number, UR, dates, year, month, day, NO3_mean, NO2_mean, NH4_mean, PO4_mean, SiO2_mean, water_temp_Mean, salinity_Mean, chl_a_Mean, tur_range_Mean, do_Mean) %>% mutate(NO2_NO3  = NO3_mean + NO2_mean)

str(env_clean)

## 'data.frame':    299 obs. of  22 variables:
##  $ Name           : chr  "R1C-2020-09-17" "R1C-2020-10-01" "R1C-2020-10-15" "R1C-2020-10-29" ...
##  $ landuse        : chr  "R" "R" "R" "R" ...
##  $ location       : chr  "R1" "R1" "R1" "R1" ...
##  $ Position       : chr  "C" "C" "C" "C" ...
##  $ sample         : chr  "R1C" "R1C" "R1C" "R1C" ...
##  $ number         : chr  "1" "1" "1" "1" ...
##  $ UR             : chr  "R" "R" "R" "R" ...
##  $ dates          : chr  "2020-09-17" "2020-10-01" "2020-10-15" "2020-10-29" ...
##  $ year           : chr  "2020" "2020" "2020" "2020" ...
##  $ month          : chr  "09" "10" "10" "10" ...
##  $ day            : chr  "17" "01" "15" "29" ...
##  $ NO3_mean       : num  0.1564 0 0 0.0325 0.1255 ...
##  $ NO2_mean       : num  0.035867 0.000482 0 0.001376 0.002405 ...
##  $ NH4_mean       : num  0.747 0.352 0.334 0 0 ...
##  $ PO4_mean       : num  0.01161 0.02267 0.02533 0.02439 0.00958 ...
##  $ SiO2_mean      : num  17.25 23.33 8.8 10.34 5.43 ...
##  $ water_temp_Mean: num  31.1 25 26.7 25.9 23.7 ...
##  $ salinity_Mean  : num  28.5 34.2 34.5 34.5 34.7 ...
##  $ chl_a_Mean     : num  0.643 0.399 0.25 0.245 0.292 ...
##  $ tur_range_Mean : num  2.984 7.775 6.118 0.958 1.4 ...
##  $ do_Mean        : num  93.4 88.7 90.7 92.2 96 ...
##  $ NO2_NO3        : num  0.192301 0.000482 0 0.033873 0.127934 ...

z-score data columns:

library(magrittr)
library(liver)

## 
## Attaching package: 'liver'

## The following object is masked from 'package:base':
## 
##     transform

env_clean$NO3_z <- transform(env_clean$NO3_mean, method = "zscore")
env_clean$NO2_z  <- transform(env_clean$NO2_mean, method = "zscore")
env_clean$NH4_z  <- transform(env_clean$NH4_mean, method = "zscore")
env_clean$PO4_z  <- transform(env_clean$PO4_mean, method = "zscore")
env_clean$SiO2_z  <- transform(env_clean$SiO2_mean, method = "zscore")
env_clean$water_temp_z  <- transform(env_clean$water_temp_Mean, method = "zscore")
env_clean$salinity_z  <- transform(env_clean$salinity_Mean, method = "zscore")
env_clean$chl_a_z <- transform(env_clean$chl_a_Mean, method = "zscore")
env_clean$tur_z  <- transform(env_clean$tur_range_Mean, method = "zscore")
env_clean$do_z  <- transform(env_clean$do_Mean, method = "zscore")
env_clean$NO2_NO3_z <- transform(env_clean$NO2_NO3, method = "zscore")

correlation plots

library(corrplot)

## corrplot 0.92 loaded

corrplot(cor(env_clean %>% select(water_temp_z, salinity_z, chl_a_z, tur_z, do_z, NO2_NO3_z, NH4_z, PO4_z, SiO2_z)), type = "upper", diag = FALSE, title = "All Sites")

by site type:

Urban

corrplot(cor(env_clean %>% filter(landuse == "U") %>%  select(water_temp_z, salinity_z, chl_a_z, tur_z, do_z, NO2_NO3_z, NH4_z, PO4_z, SiO2_z)), type = "upper", diag = FALSE, title = "Urban Sites")

Significance test and add significance results to the plot: (Figure 4)

testRes = cor.mtest((env_clean %>% filter(landuse == "U") %>%  select(water_temp_z, salinity_z, chl_a_z, tur_z, do_z, NO2_NO3_z, NH4_z, PO4_z, SiO2_z)), conf.level = 0.95)


M = cor(env_clean %>% filter(landuse == "U") %>%  select(water_temp_z, salinity_z, chl_a_z, tur_z, do_z, NO2_NO3_z, NH4_z, PO4_z, SiO2_z))

corrplot(M, p.mat = testRes$p, diag = FALSE, type = 'upper',
         sig.level = c( 0.01), pch.cex = 1.3,
         insig = 'label_sig', pch.col = 'black', tl.col = "black", title = "Urban Sites")

# pdf(file = "env_corr_plot_urban.pdf",   # The directory you want to save the file in
#     width = 4, # The width of the plot in inches
#     height = 4)
# 
# 
# corrplot(M, p.mat = testRes$p, diag = FALSE, type = 'upper',
#          sig.level = c( 0.01), pch.cex = 1.3,
#          insig = 'label_sig', pch.col = 'black', tl.col = "black", title = "Urban Sites")
# 
# dev.off()

Rural

corrplot(cor(env_clean %>% filter(landuse == "R") %>%  select(water_temp_z, salinity_z, chl_a_z, tur_z, do_z, NO2_NO3_z, NH4_z, PO4_z, SiO2_z)), type = "upper", diag = FALSE, tl.col = "black", title = "Rural Sites")

#addCoef.col = "black"

Significance test and add significance results to the plot: (Figure 4)

testRes = cor.mtest((env_clean %>% filter(landuse == "R") %>%  select(water_temp_z, salinity_z, chl_a_z, tur_z, do_z, NO2_NO3_z, NH4_z, PO4_z, SiO2_z)), conf.level = 0.95)


M = cor(env_clean %>% filter(landuse == "R") %>%  select(water_temp_z, salinity_z, chl_a_z, tur_z, do_z, NO2_NO3_z, NH4_z, PO4_z, SiO2_z))

corrplot(M, p.mat = testRes$p, diag = FALSE, type = 'upper',
         sig.level = c( 0.01), pch.cex = 1.3,
         insig = 'label_sig', pch.col = 'black', tl.col = "black", title = "Rural Sites")

# pdf(file = "env_corr_plot_rural.pdf",   # The directory you want to save the file in
#     width = 4, # The width of the plot in inches
#     height = 4)
# 
# 
# corrplot(M, p.mat = testRes$p, diag = FALSE, type = 'upper',
#          sig.level = c( 0.01), pch.cex = 1.3,
#          insig = 'label_sig', pch.col = 'black', tl.col = "black", title = "Rural Sites")
# 
# dev.off()

Physicochemical timeseries plots

prep data: melt raw env data, plot by site by time

env_timeline <- env_clean %>% select(Name, landuse, location, Position, sample, number, UR, dates, NO3_mean, NO2_mean, NH4_mean,PO4_mean,SiO2_mean,water_temp_Mean, salinity_Mean,chl_a_Mean,tur_range_Mean, do_Mean) %>%
  mutate(NO2_NO3 = NO3_mean + NO2_mean) %>% 
  pivot_longer(!c(Name, landuse, location, Position, sample, number, UR, dates), names_to = "var", values_to = "value")

library(lubridate)

env_timeline$dates <- ymd(env_timeline$dates)

env_timeline$Position <- factor(env_timeline$Position, levels = c("S", "C", "N"))

physical timeseries

env_timeline %>% filter(var %in% c("water_temp_Mean", "salinity_Mean", "tur_range_Mean", "do_Mean")) %>%  ggplot(aes( x = dates, y =  value, color = Position)) +
  facet_grid(factor(var, levels = c("water_temp_Mean", "salinity_Mean", "tur_range_Mean", "do_Mean"))~factor(location, levels= c("U1", "U2", "R1", "R2")), scales = "free_y") + geom_point() + geom_path(size =.1)  + xlab("") + ylab("") + theme_bw() + scale_color_manual(values = c("#005694","#D82354", "#FFC317")) + theme(legend.position = "none") + theme(plot.margin= margin(t = 0, r = 0, b = 0, l = 0, unit = "pt")) + theme(axis.title=element_text(size=8)) + scale_x_date(date_breaks = "1 month", labels=date_format("%b")) + theme(axis.text.x = element_text(angle= 90, vjust = 0.5, hjust=1)) + theme(panel.grid.minor = element_blank())

ggsave(“phys_metadata.pdf”, width = 10, height = 6)

chemical timeseries

env_timeline %>% filter(var %in% c("NO2_NO3", "NH4_mean",    "PO4_mean", "SiO2_mean" )) %>%  ggplot(aes( x = dates, y =  value, color = Position)) +
  facet_grid(factor(var, levels = c("NO2_NO3", "NH4_mean",   "PO4_mean", "SiO2_mean" ))~factor(location, levels= c("U1", "U2", "R1", "R2")), scales = "free_y") + geom_point() + geom_path(size =.1)  + xlab("") + ylab("") + theme_bw() + scale_color_manual(values = c("#005694","#D82354", "#FFC317")) + theme(legend.position = "none") + theme(plot.margin= margin(t = 0, r = 0, b = 0, l = 0, unit = "pt")) + theme(axis.title=element_text(size=8)) + scale_x_date(date_breaks = "1 month", labels=date_format("%b")) + theme(axis.text.x = element_text(angle= 90, vjust = 0.5, hjust=1)) + theme(panel.grid.minor = element_blank())

ggsave(“chem_metadata.pdf”, width = 10, height = 6)

Chlorophyll a

env_timeline %>% filter(var == "chl_a_Mean") %>%  ggplot(aes( x = dates, y =  value, color = Position)) +
  facet_grid(factor(var, levels = c("chl_a_Mean" ))~factor(location, levels= c("U1", "U2", "R1", "R2")), scales = "free_y") + geom_point() + geom_path(size =.1)  + xlab("") + ylab("") + theme_bw() + scale_color_manual(values = c("#005694","#D82354", "#FFC317")) + theme(legend.position = "none") + theme(plot.margin= margin(t = 0, r = 0, b = 0, l = 0, unit = "pt")) + theme(axis.title=element_text(size=8)) + scale_x_date(date_breaks = "1 month", labels=date_format("%b")) + theme(axis.text.x = element_text(angle= 90, vjust = 0.5, hjust=1)) + theme(panel.grid.minor = element_blank())

RDA

Prep data for Redundancy Analysis

samplemap<- read.csv("meta.csv")
samplemap$date <- dmy(samplemap$date)

## Warning: 5 failed to parse.

samplemap$Month <- as.factor(as.numeric(samplemap$Month))

## Warning in is.factor(x): NAs introduced by coercion

samplemap <- samplemap %>% mutate(Day=day(date)) %>% unite(Name, c(LandUse, Number, Position, Month, Day), sep = "_", remove = FALSE)

str(samplemap)

## 'data.frame':    297 obs. of  10 variables:
##  $ SampleID: chr  "R1C-2020-09-17-1" "R1C-2020-10-01-49" "R1C-2020-10-15-51" "R1C-2020-10-29-66" ...
##  $ SS      : chr  "R1C" "R1C" "R1C" "R1C" ...
##  $ date    : Date, format: "2020-09-17" "2020-10-01" ...
##  $ Name    : chr  "R_1_C_9_17" "R_1_C_10_1" "R_1_C_10_15" "R_1_C_10_29" ...
##  $ LandUse : chr  "R" "R" "R" "R" ...
##  $ Number  : chr  "1" "1" "1" "1" ...
##  $ Position: chr  "C" "C" "C" "C" ...
##  $ Month   : Factor w/ 12 levels "1","2","3","4",..: 9 10 10 10 11 12 12 9 10 10 ...
##  $ Area    : chr  "R1" "R1" "R1" "R1" ...
##  $ Day     : int  17 1 15 29 12 10 24 17 1 29 ...

samplemap$datechr <- as.character(samplemap$date)
samplemap <- samplemap %>% unite(Name, c(SS, datechr), sep = "-")

metaE <- left_join(samplemap, env_clean)

## Joining with `by = join_by(Name, Position)`

row.names(metaE) <- metaE$SampleID

METAenv <- sample_data(metaE)

Rural

(Figure S5 A)

rural <- subset_samples(ps_nochloro, LandUse %in% c("R"))


OTU4clr<- data.frame(t(data.frame(otu_table(rural))))

row.names(OTU4clr) <- str_remove(row.names(OTU4clr), "X")
row.names(OTU4clr) <- gsub("\\.", "-", row.names(OTU4clr))

OTUs.clr <- codaSeq.clr(OTU4clr + 0.5, samples.by.row=TRUE)
OTU2 <- otu_table(as.matrix(OTUs.clr), taxa_are_rows = FALSE)


psCLR <- phyloseq(OTU2,TAX,METAenv)


cca_litdir <- ordinate(psCLR, formula = ~ NO2_NO3_z+ NH4_z +PO4_z +SiO2_z +water_temp_z +salinity_z + tur_z +do_z , "RDA")


p0 <- plot_ordination(psCLR, cca_litdir, type = "samples") + theme_bw()+ geom_hline(yintercept = 0, linetype = "dashed", color = "lightgrey") +  geom_vline(xintercept = 0, linetype = "dashed", color = "lightgrey") + theme(panel.grid.minor = element_blank(), panel.grid.major = element_blank()) + geom_point(aes(fill=Month, shape = Number), size =3)  + scale_shape_manual(values= c(21,24, 22)) + scale_fill_manual(values=c(blue[1:3], purple[2:4], green[1:3], yellow[1:3]))+ theme(text = element_text(size = 14)) +  theme(legend.position="none") +ggtitle("Rural Sites") 


# Now add the environmental variables as arrows
arrowmat = vegan::scores(cca_litdir, display = "bp")
arrowdf <- data.frame(labels = rownames(arrowmat), arrowmat)
arrow_map = aes(xend = 3* RDA1, yend = 3* RDA2, x = 0, y = 0, shape = NULL, color = NULL, 
    label = labels)
label_map = aes(x = 3.5 * RDA1, y = 3.5 * RDA2, shape = NULL, color = NULL, 
    label = labels)
arrowhead = arrow(length = unit(0.02, "npc"))
surfRDA_plot = p0 + geom_segment(arrow_map, size = 0.5, data = arrowdf, color = "darkgray", 
    arrow = arrowhead) + geom_text(label_map, size = 4, data = arrowdf)

## Warning in geom_segment(arrow_map, size = 0.5, data = arrowdf, color =
## "darkgray", : Ignoring unknown aesthetics: label

surfRDA_plot

ggsave(“RuralSites_RDA.pdf”, width= 5, height = 4)

with colorblind colors

p0 <- plot_ordination(psCLR, cca_litdir, type = "samples") + theme_bw()+ geom_hline(yintercept = 0, linetype = "dashed", color = "lightgrey") +  geom_vline(xintercept = 0, linetype = "dashed", color = "lightgrey") + theme(panel.grid.minor = element_blank(), panel.grid.major = element_blank()) + geom_point(aes(fill=Month, shape = Number), size =3)  + scale_shape_manual(values= c(21,24, 22)) + scale_fill_manual(values=cb)+ theme(text = element_text(size = 14)) +  theme(legend.position="none") +ggtitle("Rural Sites") 

# Now add the environmental variables as arrows
arrowmat = vegan::scores(cca_litdir, display = "bp")
arrowdf <- data.frame(labels = rownames(arrowmat), arrowmat)
arrow_map = aes(xend = 3* RDA1, yend = 3* RDA2, x = 0, y = 0, shape = NULL, color = NULL, 
    label = labels)
label_map = aes(x = 3.5 * RDA1, y = 3.5 * RDA2, shape = NULL, color = NULL, 
    label = labels)
arrowhead = arrow(length = unit(0.02, "npc"))
surfRDA_plot = p0 + geom_segment(arrow_map, size = 0.5, data = arrowdf, color = "darkgray", 
    arrow = arrowhead) + geom_text(label_map, size = 4, data = arrowdf)

## Warning in geom_segment(arrow_map, size = 0.5, data = arrowdf, color =
## "darkgray", : Ignoring unknown aesthetics: label

surfRDA_plot

ANOVA of RDA results:

metaR <- metaE[row.names(metaE) %in% row.names(OTU4clr),]

ruralRDA <- rda(OTU4clr ~ NO2_NO3_z+ NH4_z +PO4_z +SiO2_z +water_temp_z +salinity_z + tur_z +do_z, data = metaR)

anova(ruralRDA , perm = 999)

## Permutation test for rda under reduced model
## Permutation: free
## Number of permutations: 999
## 
## Model: rda(formula = OTU4clr ~ NO2_NO3_z + NH4_z + PO4_z + SiO2_z + water_temp_z + salinity_z + tur_z + do_z, data = metaR)
##           Df  Variance      F Pr(>F)    
## Model      8  39616286 4.0415  0.001 ***
## Residual 137 167864353                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Overall results are significant.

ANOVA of RDA results by term: (Table S2 A)

anova(ruralRDA, by="term", perm=999)

## Permutation test for rda under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## Model: rda(formula = OTU4clr ~ NO2_NO3_z + NH4_z + PO4_z + SiO2_z + water_temp_z + salinity_z + tur_z + do_z, data = metaR)
##               Df  Variance       F Pr(>F)    
## NO2_NO3_z      1    779991  0.6366  0.576    
## NH4_z          1    213139  0.1740  0.939    
## PO4_z          1   8437091  6.8858  0.004 ** 
## SiO2_z         1   2642704  2.1568  0.097 .  
## water_temp_z   1   5169601  4.2191  0.016 *  
## salinity_z     1   3473160  2.8346  0.049 *  
## tur_z          1   2452242  2.0014  0.114    
## do_z           1  16448357 13.4241  0.001 ***
## Residual     137 167864353                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Variance partitioning for significant terms: (Table S2 A)

mod<-varpart(OTU4clr, metaR["water_temp_z"], metaR["salinity_z"], metaR["PO4_z"], metaR["do_z"])
mod

## 
## Partition of variance in RDA 
## 
## Call: varpart(Y = OTU4clr, X = metaR["water_temp_z"],
## metaR["salinity_z"], metaR["PO4_z"], metaR["do_z"])
## 
## Explanatory tables:
## X1:  metaR["water_temp_z"]
## X2:  metaR["salinity_z"]
## X3:  metaR["PO4_z"]
## X4:  metaR["do_z"] 
## 
## No. of explanatory tables: 4 
## Total variation (SS): 3.0085e+10 
##             Variance: 207480639 
## No. of observations: 146 
## 
## Partition table:
##                             Df R.square Adj.R.square Testable
## [aeghklno] = X1              1  0.02879      0.02204     TRUE
## [befiklmo] = X2              1  0.01290      0.00604     TRUE
## [cfgjlmno] = X3              1  0.03803      0.03135     TRUE
## [dhijkmno] = X4              1  0.08045      0.07406     TRUE
## [abefghiklmno] = X1+X2       2  0.03900      0.02556     TRUE
## [acefghjklmno] = X1+X3       2  0.06594      0.05287     TRUE
## [adeghijklmno] = X1+X4       2  0.10869      0.09623     TRUE
## [bcefgijklmno] = X2+X3       2  0.05157      0.03831     TRUE
## [bdefhijklmno] = X2+X4       2  0.09529      0.08264     TRUE
## [cdfghijklmno] = X3+X4       2  0.11510      0.10272     TRUE
## [abcefghijklmno] = X1+X2+X3  3  0.07503      0.05549     TRUE
## [abdefghijklmno] = X1+X2+X4  3  0.11926      0.10065     TRUE
## [acdefghijklmno] = X1+X3+X4  3  0.14542      0.12736     TRUE
## [bcdefghijklmno] = X2+X3+X4  3  0.13059      0.11222     TRUE
## [abcdefghijklmno] = All      4  0.15484      0.13087     TRUE
## Individual fractions                                         
## [a] = X1 | X2+X3+X4          1               0.01864     TRUE
## [b] = X2 | X1+X3+X4          1               0.00350     TRUE
## [c] = X3 | X1+X2+X4          1               0.03022     TRUE
## [d] = X4 | X1+X2+X3          1               0.07538     TRUE
## [e]                          0               0.00600    FALSE
## [f]                          0               0.00092    FALSE
## [g]                          0              -0.00063    FALSE
## [h]                          0              -0.00146    FALSE
## [i]                          0              -0.00089    FALSE
## [j]                          0              -0.00029    FALSE
## [k]                          0              -0.00166    FALSE
## [l]                          0              -0.00184    FALSE
## [m]                          0              -0.00001    FALSE
## [n]                          0               0.00297    FALSE
## [o]                          0               0.00003    FALSE
## [p] = Residuals              0               0.86913    FALSE
## Controlling 2 tables X                                       
## [ae] = X1 | X3+X4            1               0.02464     TRUE
## [ag] = X1 | X2+X4            1               0.01801     TRUE
## [ah] = X1 | X2+X3            1               0.01718     TRUE
## [be] = X2 | X3+X4            1               0.00950     TRUE
## [bf] = X2 | X1+X4            1               0.00442     TRUE
## [bi] = X2 | X1+X3            1               0.00261     TRUE
## [cf] = X3 | X1+X4            1               0.03114     TRUE
## [cg] = X3 | X2+X4            1               0.02959     TRUE
## [cj] = X3 | X1+X2            1               0.02993     TRUE
## [dh] = X4 | X2+X3            1               0.07392     TRUE
## [di] = X4 | X1+X3            1               0.07449     TRUE
## [dj] = X4 | X1+X2            1               0.07509     TRUE
## Controlling 1 table X                                        
## [aghn] = X1 | X2             1               0.01952     TRUE
## [aehk] = X1 | X3             1               0.02152     TRUE
## [aegl] = X1 | X4             1               0.02217     TRUE
## [bfim] = X2 | X1             1               0.00352     TRUE
## [beik] = X2 | X3             1               0.00695     TRUE
## [befl] = X2 | X4             1               0.00858     TRUE
## [cfjm] = X3 | X1             1               0.03083     TRUE
## [cgjn] = X3 | X2             1               0.03226     TRUE
## [cfgl] = X3 | X4             1               0.02866     TRUE
## [dijm] = X4 | X1             1               0.07419     TRUE
## [dhjn] = X4 | X2             1               0.07659     TRUE
## [dhik] = X4 | X3             1               0.07137     TRUE
## ---
## Use function 'rda' to test significance of fractions of interest

Urban

(Figure S5 B)

urban <- subset_samples(ps_nochloro, LandUse %in% c("U"))

OTU4clr<- data.frame(t(data.frame(otu_table(urban))))

row.names(OTU4clr) <- str_remove(row.names(OTU4clr), "X")
row.names(OTU4clr) <- gsub("\\.", "-", row.names(OTU4clr))

OTUs.clr <- codaSeq.clr(OTU4clr + 0.5, samples.by.row=TRUE)
OTU2 <- otu_table(as.matrix(OTUs.clr), taxa_are_rows = FALSE)

psCLR <- phyloseq(OTU2,TAX,METAenv)

cca_litdir <- ordinate(psCLR, formula = ~ NO2_NO3_z+ NH4_z +PO4_z +SiO2_z +water_temp_z +salinity_z + tur_z +do_z , "RDA")


p0 <- plot_ordination(psCLR, cca_litdir, type = "samples") + theme_bw()+ geom_hline(yintercept = 0, linetype = "dashed", color = "lightgrey") +  geom_vline(xintercept = 0, linetype = "dashed", color = "lightgrey") + theme(panel.grid.minor = element_blank(), panel.grid.major = element_blank()) + geom_point(aes(fill=Month, shape = Number), size =3)  + scale_shape_manual(values= c(21,24, 22)) + scale_fill_manual(values=c(blue[1:3], purple[2:4], green[1:3], yellow[1:3]))+ theme(text = element_text(size = 14)) +  theme(legend.position="none") +ggtitle("Urban Sites") 
# Now add the environmental variables as arrows
arrowmat = vegan::scores(cca_litdir, display = "bp")
arrowdf <- data.frame(labels = rownames(arrowmat), arrowmat)
arrow_map = aes(xend = 3* RDA1, yend = 3* RDA2, x = 0, y = 0, shape = NULL, color = NULL, 
    label = labels)
label_map = aes(x = 3.5 * RDA1, y = 3.5 * RDA2, shape = NULL, color = NULL, 
    label = labels)
arrowhead = arrow(length = unit(0.02, "npc"))
surfRDA_plot = p0 + geom_segment(arrow_map, size = 0.5, data = arrowdf, color = "gray", 
    arrow = arrowhead) + geom_text(label_map, size = 4, data = arrowdf)

## Warning in geom_segment(arrow_map, size = 0.5, data = arrowdf, color = "gray",
## : Ignoring unknown aesthetics: label

surfRDA_plot

ggsave(“UrbanSites_RDA.pdf”, width= 5, height = 4)

with colorblind colors

p0 <- plot_ordination(psCLR, cca_litdir, type = "samples") + theme_bw()+ geom_hline(yintercept = 0, linetype = "dashed", color = "lightgrey") +  geom_vline(xintercept = 0, linetype = "dashed", color = "lightgrey") + theme(panel.grid.minor = element_blank(), panel.grid.major = element_blank()) + geom_point(aes(fill=Month, shape = Number), size =3)  + scale_shape_manual(values= c(21,24, 22)) + scale_fill_manual(values=cb)+ theme(text = element_text(size = 14)) +  theme(legend.position="none") +ggtitle("Urban Sites") 
# Now add the environmental variables as arrows
arrowmat = vegan::scores(cca_litdir, display = "bp")
arrowdf <- data.frame(labels = rownames(arrowmat), arrowmat)
arrow_map = aes(xend = 3* RDA1, yend = 3* RDA2, x = 0, y = 0, shape = NULL, color = NULL, 
    label = labels)
label_map = aes(x = 3.5 * RDA1, y = 3.5 * RDA2, shape = NULL, color = NULL, 
    label = labels)
arrowhead = arrow(length = unit(0.02, "npc"))
surfRDA_plot = p0 + geom_segment(arrow_map, size = 0.5, data = arrowdf, color = "gray", 
    arrow = arrowhead) + geom_text(label_map, size = 4, data = arrowdf)

## Warning in geom_segment(arrow_map, size = 0.5, data = arrowdf, color = "gray",
## : Ignoring unknown aesthetics: label

surfRDA_plot

ANOVA of RDA results:

metaU <- metaE[row.names(metaE) %in% row.names(OTU4clr),]

urbanRDA <- rda(OTU4clr ~ NO2_NO3_z+ NH4_z +PO4_z +SiO2_z +water_temp_z +salinity_z + tur_z +do_z, data = metaU)

anova(urbanRDA , perm = 999)

## Permutation test for rda under reduced model
## Permutation: free
## Number of permutations: 999
## 
## Model: rda(formula = OTU4clr ~ NO2_NO3_z + NH4_z + PO4_z + SiO2_z + water_temp_z + salinity_z + tur_z + do_z, data = metaU)
##           Df  Variance      F Pr(>F)   
## Model      8  17142721 2.2803  0.003 **
## Residual 136 127798928                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Overall results are significant.

ANOVA of RDA results by term: (Table S2 B)

anova(urbanRDA, by="term", perm=999)

## Permutation test for rda under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## Model: rda(formula = OTU4clr ~ NO2_NO3_z + NH4_z + PO4_z + SiO2_z + water_temp_z + salinity_z + tur_z + do_z, data = metaU)
##               Df  Variance      F Pr(>F)    
## NO2_NO3_z      1   5201125 5.5349  0.001 ***
## NH4_z          1    727732 0.7744  0.427    
## PO4_z          1   1390986 1.4802  0.156    
## SiO2_z         1   1000320 1.0645  0.338    
## water_temp_z   1   3533677 3.7604  0.003 ** 
## salinity_z     1   1742003 1.8538  0.103    
## tur_z          1   2150375 2.2884  0.055 .  
## do_z           1   1396502 1.4861  0.160    
## Residual     136 127798928                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Variance partitioning for significant terms: (Table S2 B)

modU<-varpart(OTU4clr, metaU["water_temp_z"], metaU["NO2_NO3_z"])
modU

## 
## Partition of variance in RDA 
## 
## Call: varpart(Y = OTU4clr, X = metaU["water_temp_z"],
## metaU["NO2_NO3_z"])
## 
## Explanatory tables:
## X1:  metaU["water_temp_z"]
## X2:  metaU["NO2_NO3_z"] 
## 
## No. of explanatory tables: 2 
## Total variation (SS): 2.0872e+10 
##             Variance: 144941649 
## No. of observations: 145 
## 
## Partition table:
##                      Df R.squared Adj.R.squared Testable
## [a+c] = X1            1   0.02845       0.02165     TRUE
## [b+c] = X2            1   0.03588       0.02914     TRUE
## [a+b+c] = X1+X2       2   0.06104       0.04781     TRUE
## Individual fractions                                    
## [a] = X1|X2           1                 0.01867     TRUE
## [b] = X2|X1           1                 0.02616     TRUE
## [c]                   0                 0.00298    FALSE
## [d] = Residuals                         0.95219    FALSE
## ---
## Use function 'rda' to test significance of fractions of interest

Session info

sessioninfo::session_info()

## ─ Session info ───────────────────────────────────────────────────────────────
##  setting  value
##  version  R version 4.3.1 (2023-06-16)
##  os       macOS Big Sur ... 10.16
##  system   x86_64, darwin13.4.0
##  ui       unknown
##  language (EN)
##  collate  en_US.UTF-8
##  ctype    en_US.UTF-8
##  tz       America/New_York
##  date     2023-12-05
##  pandoc   3.1.3 @ /Users/brisbin/miniconda3/envs/R/bin/ (via rmarkdown)
## 
## ─ Packages ───────────────────────────────────────────────────────────────────
##  package              * version    date (UTC) lib source
##  abind                  1.4-5      2016-07-21 [1] CRAN (R 4.3.1)
##  ade4                   1.7-22     2023-02-06 [1] CRAN (R 4.3.1)
##  ALDEx2               * 1.32.0     2023-04-25 [1] Bioconductor
##  ape                  * 5.7-1      2023-03-13 [1] CRAN (R 4.3.1)
##  backports              1.4.1      2021-12-13 [1] CRAN (R 4.3.0)
##  base64enc              0.1-3      2015-07-28 [1] CRAN (R 4.3.0)
##  Biobase                2.60.0     2023-04-25 [1] Bioconductor
##  BiocGenerics           0.46.0     2023-04-25 [1] Bioconductor
##  BiocParallel           1.34.2     2023-05-22 [1] Bioconductor
##  biomformat           * 1.28.0     2023-04-25 [1] Bioconductor
##  Biostrings             2.68.1     2023-05-16 [1] Bioconductor
##  bitops                 1.0-7      2021-04-24 [1] CRAN (R 4.3.1)
##  boot                   1.3-28.1   2022-11-22 [1] CRAN (R 4.3.0)
##  breakaway            * 4.8.4      2022-11-22 [1] CRAN (R 4.3.1)
##  bslib                  0.5.1      2023-08-11 [1] CRAN (R 4.3.1)
##  cachem                 1.0.8      2023-05-01 [1] CRAN (R 4.3.0)
##  car                  * 3.1-2      2023-03-30 [1] CRAN (R 4.3.1)
##  carData              * 3.0-5      2022-01-06 [1] CRAN (R 4.3.1)
##  checkmate              2.2.0      2023-04-27 [1] CRAN (R 4.3.1)
##  cli                    3.6.1      2023-03-23 [1] CRAN (R 4.3.0)
##  cluster                2.1.4      2022-08-22 [1] CRAN (R 4.3.0)
##  CoDaSeq              * 0.99.7     2023-09-25 [1] Github (ggloor/CoDaSeq@72f7fd2)
##  codetools              0.2-19     2023-02-01 [1] CRAN (R 4.3.0)
##  colorspace             2.1-0      2023-01-23 [1] CRAN (R 4.3.0)
##  corrplot             * 0.92       2021-11-18 [1] CRAN (R 4.3.1)
##  crayon                 1.5.2      2022-09-29 [1] CRAN (R 4.3.0)
##  data.table             1.14.8     2023-02-17 [1] CRAN (R 4.3.0)
##  DelayedArray           0.26.7     2023-07-28 [1] Bioconductor
##  digest                 0.6.33     2023-07-07 [1] CRAN (R 4.3.0)
##  dplyr                * 1.1.3      2023-09-03 [1] CRAN (R 4.3.1)
##  DT                     0.30       2023-10-05 [1] CRAN (R 4.3.1)
##  evaluate               0.22       2023-09-29 [1] CRAN (R 4.3.1)
##  fansi                  1.0.5      2023-10-08 [1] CRAN (R 4.3.1)
##  farver                 2.1.1      2022-07-06 [1] CRAN (R 4.3.0)
##  fastmap                1.1.1      2023-02-24 [1] CRAN (R 4.3.0)
##  forcats              * 1.0.0      2023-01-29 [1] CRAN (R 4.3.0)
##  foreach                1.5.2      2022-02-02 [1] CRAN (R 4.3.0)
##  foreign                0.8-85     2023-09-09 [1] CRAN (R 4.3.1)
##  Formula                1.2-5      2023-02-24 [1] CRAN (R 4.3.1)
##  generics               0.1.3      2022-07-05 [1] CRAN (R 4.3.0)
##  GenomeInfoDb           1.36.4     2023-10-02 [1] Bioconductor
##  GenomeInfoDbData       1.2.10     2023-09-25 [1] Bioconductor
##  GenomicRanges          1.52.1     2023-10-08 [1] Bioconductor
##  ggplot2              * 3.4.3      2023-08-14 [1] CRAN (R 4.3.1)
##  glmnet                 4.1-8      2023-08-22 [1] CRAN (R 4.3.1)
##  glue                   1.6.2      2022-02-24 [1] CRAN (R 4.3.0)
##  gridBase               0.4-7      2014-02-24 [1] CRAN (R 4.3.1)
##  gridExtra              2.3        2017-09-09 [1] CRAN (R 4.3.1)
##  gtable                 0.3.4      2023-08-21 [1] CRAN (R 4.3.1)
##  Hmisc                  5.1-1      2023-09-12 [1] CRAN (R 4.3.1)
##  hms                    1.1.3      2023-03-21 [1] CRAN (R 4.3.0)
##  htmlTable              2.4.1      2022-07-07 [1] CRAN (R 4.3.1)
##  htmltools              0.5.6.1    2023-10-06 [1] CRAN (R 4.3.1)
##  htmlwidgets            1.6.2      2023-03-17 [1] CRAN (R 4.3.0)
##  huge                   1.3.5      2021-06-30 [1] CRAN (R 4.3.1)
##  igraph               * 1.5.1      2023-08-10 [1] CRAN (R 4.3.1)
##  IRanges                2.34.1     2023-06-22 [1] Bioconductor
##  iterators              1.0.14     2022-02-05 [1] CRAN (R 4.3.0)
##  jquerylib              0.1.4      2021-04-26 [1] CRAN (R 4.3.0)
##  jsonlite               1.8.7      2023-06-29 [1] CRAN (R 4.3.0)
##  knitr                  1.44       2023-09-11 [1] CRAN (R 4.3.1)
##  labeling               0.4.3      2023-08-29 [1] CRAN (R 4.3.1)
##  lattice              * 0.21-9     2023-10-01 [1] CRAN (R 4.3.1)
##  lifecycle              1.0.3      2022-10-07 [1] CRAN (R 4.3.0)
##  liver                * 1.14       2023-04-11 [1] CRAN (R 4.3.1)
##  lme4                   1.1-34     2023-07-04 [1] CRAN (R 4.3.1)
##  lubridate            * 1.9.3      2023-09-27 [1] CRAN (R 4.3.1)
##  magrittr             * 2.0.3      2022-03-30 [1] CRAN (R 4.3.0)
##  MASS                 * 7.3-60     2023-05-04 [1] CRAN (R 4.3.0)
##  Matrix               * 1.6-1.1    2023-09-18 [1] CRAN (R 4.3.1)
##  MatrixGenerics         1.12.3     2023-07-30 [1] Bioconductor
##  matrixStats            1.0.0      2023-06-02 [1] CRAN (R 4.3.1)
##  mgcv                   1.9-0      2023-07-11 [1] CRAN (R 4.3.0)
##  minqa                  1.2.6      2023-09-11 [1] CRAN (R 4.3.1)
##  multtest               2.56.0     2023-04-25 [1] Bioconductor
##  munsell                0.5.0      2018-06-12 [1] CRAN (R 4.3.0)
##  NADA                 * 1.6-1.1    2020-03-22 [1] CRAN (R 4.3.1)
##  nlme                   3.1-163    2023-08-09 [1] CRAN (R 4.3.1)
##  nloptr                 2.0.3      2022-05-26 [1] CRAN (R 4.3.1)
##  nnet                   7.3-19     2023-05-03 [1] CRAN (R 4.3.0)
##  patchwork            * 1.1.3.9000 2023-09-25 [1] Github (thomasp85/patchwork@51a6eff)
##  permute              * 0.9-7      2022-01-27 [1] CRAN (R 4.3.1)
##  phyloseq             * 1.44.0     2023-04-25 [1] Bioconductor
##  pillar                 1.9.0      2023-03-22 [1] CRAN (R 4.3.0)
##  pkgconfig              2.0.3      2019-09-22 [1] CRAN (R 4.3.0)
##  plyr                   1.8.9      2023-10-02 [1] CRAN (R 4.3.1)
##  pulsar                 0.3.11     2023-09-24 [1] CRAN (R 4.3.1)
##  purrr                * 1.0.2      2023-08-10 [1] CRAN (R 4.3.1)
##  qiime2R              * 0.99.6     2023-09-25 [1] Github (jbisanz/qiime2R@2a3cee1)
##  R6                     2.5.1      2021-08-19 [1] CRAN (R 4.3.0)
##  Rcpp                   1.0.11     2023-07-06 [1] CRAN (R 4.3.0)
##  RcppZiggurat           0.1.6      2020-10-20 [1] CRAN (R 4.3.1)
##  RCurl                  1.98-1.12  2023-03-27 [1] CRAN (R 4.3.1)
##  readr                * 2.1.4      2023-02-10 [1] CRAN (R 4.3.0)
##  reshape2             * 1.4.4      2020-04-09 [1] CRAN (R 4.3.0)
##  Rfast                  2.0.8      2023-07-03 [1] CRAN (R 4.3.1)
##  rhdf5                  2.44.0     2023-04-25 [1] Bioconductor
##  rhdf5filters           1.12.1     2023-04-30 [1] Bioconductor
##  Rhdf5lib               1.22.1     2023-09-10 [1] Bioconductor
##  rlang                  1.1.1      2023-04-28 [1] CRAN (R 4.3.0)
##  rmarkdown              2.25       2023-09-18 [1] CRAN (R 4.3.1)
##  rpart                  4.1.21     2023-10-09 [1] CRAN (R 4.3.1)
##  rstudioapi             0.15.0     2023-07-07 [1] CRAN (R 4.3.0)
##  S4Arrays               1.0.6      2023-08-30 [1] Bioconductor
##  S4Vectors              0.38.2     2023-09-22 [1] Bioconductor
##  sass                   0.4.7      2023-07-15 [1] CRAN (R 4.3.1)
##  scales               * 1.2.1      2022-08-20 [1] CRAN (R 4.3.0)
##  sessioninfo            1.2.2      2021-12-06 [1] CRAN (R 4.3.1)
##  shape                  1.4.6      2021-05-19 [1] CRAN (R 4.3.0)
##  SpiecEasi            * 1.1.2      2023-09-25 [1] Github (zdk123/SpiecEasi@d6bc127)
##  stringi                1.7.12     2023-01-11 [1] CRAN (R 4.3.1)
##  stringr              * 1.5.0      2022-12-02 [1] CRAN (R 4.3.0)
##  SummarizedExperiment   1.30.2     2023-06-06 [1] Bioconductor
##  survival             * 3.5-7      2023-08-14 [1] CRAN (R 4.3.1)
##  tibble               * 3.2.1      2023-03-20 [1] CRAN (R 4.3.0)
##  tidyr                * 1.3.0      2023-01-24 [1] CRAN (R 4.3.0)
##  tidyselect             1.2.0      2022-10-10 [1] CRAN (R 4.3.0)
##  tidyverse            * 2.0.0      2023-02-22 [1] CRAN (R 4.3.0)
##  timechange             0.2.0      2023-01-11 [1] CRAN (R 4.3.0)
##  truncnorm            * 1.0-9      2023-03-20 [1] CRAN (R 4.3.1)
##  tzdb                   0.4.0      2023-05-12 [1] CRAN (R 4.3.0)
##  utf8                   1.2.3      2023-01-31 [1] CRAN (R 4.3.0)
##  vctrs                  0.6.3      2023-06-14 [1] CRAN (R 4.3.0)
##  vegan                * 2.6-4      2022-10-11 [1] CRAN (R 4.3.1)
##  VGAM                   1.1-9      2023-09-19 [1] CRAN (R 4.3.1)
##  viridis                0.6.4      2023-07-22 [1] CRAN (R 4.3.1)
##  viridisLite            0.4.2      2023-05-02 [1] CRAN (R 4.3.0)
##  withr                  2.5.1      2023-09-26 [1] CRAN (R 4.3.1)
##  xfun                   0.40       2023-08-09 [1] CRAN (R 4.3.1)
##  XVector                0.40.0     2023-04-25 [1] Bioconductor
##  yaml                   2.3.7      2023-01-23 [1] CRAN (R 4.3.0)
##  zCompositions        * 1.4.1      2023-08-23 [1] CRAN (R 4.3.1)
##  zlibbioc               1.46.0     2023-04-25 [1] Bioconductor
## 
##  [1] /Users/brisbin/miniconda3/envs/R/lib/R/library
## 
## ──────────────────────────────────────────────────────────────────────────────

Urban Oki

Maggi Brisbin

2023-11-17

Getting started

Packages, colors, functions

Sample Metadata

Sequence processing stats

Numbers of sequencing reads

Plot for sequencing stats

Load Data

Metadata

ASVs

Taxonomy (Silva)

Make complete phyloseq object:

Plot & filter Chloroplast 16S rRNA sequences

Rarefaction Curves

Alpha diversity

Richness (number of observed unique ASVs):

Shannon Index

Beta Diversity

PCoA all together

PERMANOVA

PCoA for Urban and Rural samples separately

Rural PCoA

with colorblind pallette

Rural PERMANOVAs

Urban PCoA

with colorblind colors

Urban PERMANOVAs

Rel Abundance Bar Plots

Bubble Plots

All orders

colorblind versions

filtered rel abund bar plots (Orange, red, orange and red)

Orange orders

Red Orders

Red+Orange Orders

Network Analysis

Urban samples

Rural samples

Rural Network

Urban Network

Bubble plots for individual orders

Vibrionales

Campylobacterales

Betaproteobacteriales

Bacteroidales

Clostridiales

Pseudomonadales

Physicochemical data

correlation plots

Urban

Rural

Physicochemical timeseries plots

physical timeseries

chemical timeseries

Chlorophyll a

RDA

Rural

with colorblind colors

Urban

with colorblind colors

Session info