This workflow is for using Sourcetracker (v1) with phyloseq formatted objects. Download the sourcetracker script before beginning.

The otu and tax tables have been saved as .csv files.

For more on sourcetracker (see Knights et al. 2011)

Set Up

library(ape)
library(plyr)
library(dplyr)
library(ggplot2)
library(gplots)
library(lme4)
library(tidyr)
library(vegan)
library(scales)
library(grid)
library(reshape2)

library(gridExtra)

m.g.colors <- c( "#999999", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7", "#CBD588", "#5F7FC7", "orange","#DA5724", "#508578", "#CD9BCD",
   "#AD6F3B", "#673770","#D14285", "#652926", "#C84248", 
  "#8569D5", "#5E738F","#D1A33D", "#8A7C64", "#599861" )

more.colors <- c("indianred1", "steelblue3",  "skyblue1", "mediumorchid","royalblue4", "olivedrab3",
   "pink", "#FFED6F", "mediumorchid3", "ivory2", "tan1", "aquamarine3", "#C0C0C0",
    "mediumvioletred", "#999933", "#666699", "#CC9933", "#006666", "#3399FF",
   "#993300", "#CCCC99", "#666666", "#FFCC66", "#6699CC", "#663366", "#9999CC", "#CCCCCC",
   "#669999", "#CCCC66", "#CC6600", "#9999FF", "#0066CC", "#99CCCC", "#999999", "#FFCC00",
   "#009999", "#FF9900", "#999966", "#66CCCC", "#339966", "#CCCC33", "#EDEDED"
)

theme_set(theme_classic(base_size=12, base_family="Avenir"))

Read in the OTU table

otu.table <- read.csv("TwoHarbors_OTU_table", row.names=1)
otu.table

Read metadata file. I added a column named SourceSink (in excel) that labels each sample as either a source or a sink. You could also easily do this in R.

metadata = read.csv("TwoHarbors_seq_metadata2.csv", header=TRUE, row.names=1)
metadata

In this dataset, the most upstream site is site9 (skunk creek transition or skunk creek upstream) skunk creek tributary is upstream (below a retention pond)

other skunk creek sites are downstream. most downstream is burlington bay

agate bay: both storm water outfall and wastewater outfall can be sources beach is most downstream

Sourcetracker wants the sample names as rows in an otu table and the metadatafile, which is the opposite of how phyloseq likes it.

To make this run faster the first time, let’s just look at families and rarefy to even depth using a quick scaling function

library(phyloseq)
tax.table <- read.csv("TwoHarbors_tax_table", header=TRUE, row.names=1)

# This just scales the otu counts to the lowest depth in the table. You should use the bootstrapped version if you have it.
mindepth <- min(colSums(otu.table))
s.otu.table <- data.frame(apply(otu.table, 2, function(x) round(x*mindepth/sum(x))))

#Replace single values with 0
s.otu.table[s.otu.table==1] <- 0

#Drop OTUs that are now all zeros
s.otu.table <- s.otu.table[which(rowSums(s.otu.table)>0),]

#Assemble phyloseq!
mothur.data <- merge_phyloseq(otu_table(s.otu.table, taxa_are_rows=TRUE), tax_table(as.matrix(tax.table)), sample_data(metadata))

mothur.data

## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 6124 taxa and 78 samples ]
## sample_data() Sample Data:       [ 78 samples by 6 sample variables ]
## tax_table()   Taxonomy Table:    [ 6124 taxa by 6 taxonomic ranks ]

#To work with most public datasets of source, you'll need to agglomerate OTUs into some taxonomic rank. I tried with families and genera.
family.glom <- tax_glom(mothur.data, taxrank="Family")

genus.glom <- tax_glom(mothur.data, taxrank="Genus")


#Break apart the phyloseq object into agate bay and skunk creek only
ab.physeq <- prune_samples(sample_data(family.glom)$watershed == "Agate Bay", family.glom) %>% prune_taxa(taxa_sums(.) > 0,.)

sc.physeq <- prune_samples(sample_data(family.glom)$watershed == "Skunk Creek", family.glom)%>% prune_taxa(taxa_sums(.) > 0,.)

ab.physeq

## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 363 taxa and 31 samples ]
## sample_data() Sample Data:       [ 31 samples by 6 sample variables ]
## tax_table()   Taxonomy Table:    [ 363 taxa by 6 taxonomic ranks ]

sc.physeq

## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 400 taxa and 47 samples ]
## sample_data() Sample Data:       [ 47 samples by 6 sample variables ]
## tax_table()   Taxonomy Table:    [ 400 taxa by 6 taxonomic ranks ]

#make OTU tables
ab.family.otus <- data.frame(otu_table(ab.physeq))
sc.family.otus <- data.frame(otu_table(sc.physeq))

#make metadata files that only have sc/ab samples.
sc.metadata <- data.frame(sample_data(sc.physeq))
ab.metadata <- data.frame(sample_data(ab.physeq))

Skunk Creek

orientation fix from phyloseq style to sourcetracker style

t.otus <- t(as.matrix(sc.family.otus))

Find indices of source and sink samples and put their category type in the variable called ‘envs.’

train.ix <- which(sc.metadata$SourceSink=='source')
test.ix <- which(sc.metadata$SourceSink=='sink')
envs <- sc.metadata$Site_location

desc <- sc.metadata$Site_location

Change this line to reflect wherever you downloaded sourcetracker to

source('/volumes/dean/chunlab/sourcetracker/src/SourceTracker.r')

tune the alpha values using cross-validation (this is slow! like very slow!) Use 0.001 if pressed for time.

tune.results <- tune.st(t.otus[train.ix,], envs[train.ix])
alpha1 <- tune.results$best.alpha1
alpha2 <- tune.results$best.alpha2

alpha1
alpha2

Train sourcetracker using the source samples. Be unafraid of an error message that says ‘the condition has length > 1 and only the first element will be used.’ Seems to be an internal bug that doesn’t affect the outcome.

st <- sourcetracker(t.otus[train.ix,], envs[train.ix])

## Warning in if (!is.element(class(x), c("matrix", "data.frame", "array"))) x <-
## matrix(x, : the condition has length > 1 and only the first element will be used

## Rarefying training data at 1000

Estimate source proportions in sink samples.

results <- predict(st,t.otus[test.ix,], alpha1=alpha1, alpha2=alpha2)

##                                        Skunk Cree     Skunk Cree     Unknown

## Warning in if (!is.element(class(x), c("matrix", "data.frame", "array"))) x <-
## matrix(x, : the condition has length > 1 and only the first element will be used

## ..........   1 of   30, depth= 1000:  0.03 (0.01)    0.16 (0.03) 0.80 (0.03)
## ..........   2 of   30, depth= 1000:  0.02 (0.01)    0.24 (0.06) 0.74 (0.06)
## ..........   3 of   30, depth= 1000:  0.02 (0.00)    0.04 (0.01) 0.94 (0.01)
## ..........   4 of   30, depth= 1000:  0.01 (0.00)    0.05 (0.02) 0.94 (0.02)
## ..........   5 of   30, depth= 1000:  0.02 (0.01)    0.05 (0.01) 0.93 (0.02)
## ..........   6 of   30, depth= 1000:  0.03 (0.01)    0.09 (0.02) 0.89 (0.02)
## ..........   7 of   30, depth= 1000:  0.01 (0.00)    0.04 (0.01) 0.95 (0.01)
## ..........   8 of   30, depth= 1000:  0.01 (0.01)    0.04 (0.01) 0.95 (0.01)
## ..........   9 of   30, depth= 1000:  0.01 (0.00)    0.03 (0.01) 0.96 (0.01)
## ..........  10 of   30, depth= 1000:  0.17 (0.04)    0.65 (0.04) 0.18 (0.02)
## ..........  11 of   30, depth= 1000:  0.32 (0.03)    0.67 (0.03) 0.01 (0.00)
## ..........  12 of   30, depth= 1000:  0.66 (0.04)    0.17 (0.04) 0.17 (0.03)
## ..........  13 of   30, depth= 1000:  0.13 (0.02)    0.08 (0.02) 0.79 (0.02)
## ..........  14 of   30, depth= 1000:  0.55 (0.04)    0.43 (0.04) 0.01 (0.00)
## ..........  15 of   30, depth= 1000:  0.22 (0.03)    0.76 (0.02) 0.02 (0.00)
## ..........  16 of   30, depth= 1000:  0.06 (0.02)    0.11 (0.03) 0.83 (0.02)
## ..........  17 of   30, depth= 1000:  0.04 (0.01)    0.08 (0.02) 0.87 (0.02)
## ..........  18 of   30, depth= 1000:  0.65 (0.04)    0.10 (0.04) 0.24 (0.02)
## ..........  19 of   30, depth= 1000:  0.76 (0.03)    0.06 (0.02) 0.18 (0.02)
## ..........  20 of   30, depth= 1000:  0.63 (0.03)    0.08 (0.02) 0.29 (0.01)
## ..........  21 of   30, depth= 1000:  0.12 (0.03)    0.86 (0.03) 0.02 (0.01)
## ..........  22 of   30, depth= 1000:  0.05 (0.02)    0.94 (0.02) 0.01 (0.00)
## ..........  23 of   30, depth= 1000:  0.41 (0.05)    0.54 (0.05) 0.05 (0.01)
## ..........  24 of   30, depth= 1000:  0.34 (0.05)    0.63 (0.05) 0.02 (0.01)
## ..........  25 of   30, depth= 1000:  0.23 (0.03)    0.75 (0.03) 0.02 (0.01)
## ..........  26 of   30, depth= 1000:  0.16 (0.02)    0.82 (0.02) 0.02 (0.01)
## ..........  27 of   30, depth= 1000:  0.12 (0.03)    0.05 (0.01) 0.83 (0.03)
## ..........  28 of   30, depth= 1000:  0.85 (0.03)    0.10 (0.03) 0.05 (0.01)
## ..........  29 of   30, depth= 1000:  0.35 (0.03)    0.08 (0.02) 0.57 (0.03)
## ..........  30 of   30, depth= 1000:  0.05 (0.01)    0.08 (0.02) 0.88 (0.02)

This part validates the models we made using the original training data.

results.train <- predict(st, alpha1=alpha1, alpha2=alpha2)

## ndraws=10, V=3, T=400, N=17

## Warning in if (!is.element(class(x), c("matrix", "data.frame", "array"))) x <-
## matrix(x, : the condition has length > 1 and only the first element will be used

## Rarefying training data at 1000
## ..........   1 of   17, depth= 1000:  0.03 (0.01)    0.97 (0.01) 0.00 (0.00)

## Warning in if (!is.element(class(x), c("matrix", "data.frame", "array"))) x <-
## matrix(x, : the condition has length > 1 and only the first element will be used

## Rarefying training data at 1000
## ..........   2 of   17, depth= 1000:  0.06 (0.02)    0.93 (0.02) 0.00 (0.00)

## Warning in if (!is.element(class(x), c("matrix", "data.frame", "array"))) x <-
## matrix(x, : the condition has length > 1 and only the first element will be used

## Rarefying training data at 1000
## ..........   3 of   17, depth= 1000:  0.35 (0.03)    0.62 (0.02) 0.03 (0.01)

## Warning in if (!is.element(class(x), c("matrix", "data.frame", "array"))) x <-
## matrix(x, : the condition has length > 1 and only the first element will be used

## Rarefying training data at 1000
## ..........   4 of   17, depth= 1000:  0.21 (0.03)    0.78 (0.03) 0.01 (0.00)

## Warning in if (!is.element(class(x), c("matrix", "data.frame", "array"))) x <-
## matrix(x, : the condition has length > 1 and only the first element will be used

## Rarefying training data at 1000
## ..........   5 of   17, depth= 1000:  0.02 (0.01)    0.98 (0.01) 0.00 (0.00)

## Warning in if (!is.element(class(x), c("matrix", "data.frame", "array"))) x <-
## matrix(x, : the condition has length > 1 and only the first element will be used

## Rarefying training data at 1000
## ..........   6 of   17, depth= 1000:  0.01 (0.01)    0.98 (0.01) 0.00 (0.00)

## Warning in if (!is.element(class(x), c("matrix", "data.frame", "array"))) x <-
## matrix(x, : the condition has length > 1 and only the first element will be used

## Rarefying training data at 1000
## ..........   7 of   17, depth= 1000:  0.13 (0.03)    0.86 (0.03) 0.01 (0.00)

## Warning in if (!is.element(class(x), c("matrix", "data.frame", "array"))) x <-
## matrix(x, : the condition has length > 1 and only the first element will be used

## Rarefying training data at 1000
## ..........   8 of   17, depth= 1000:  0.15 (0.03)    0.85 (0.03) 0.01 (0.00)

## Warning in if (!is.element(class(x), c("matrix", "data.frame", "array"))) x <-
## matrix(x, : the condition has length > 1 and only the first element will be used

## Rarefying training data at 1000
## ..........   9 of   17, depth= 1000:  0.35 (0.03)    0.09 (0.03) 0.56 (0.03)

## Warning in if (!is.element(class(x), c("matrix", "data.frame", "array"))) x <-
## matrix(x, : the condition has length > 1 and only the first element will be used

## Rarefying training data at 1000
## ..........  10 of   17, depth= 1000:  0.45 (0.03)    0.55 (0.03) 0.00 (0.00)

## Warning in if (!is.element(class(x), c("matrix", "data.frame", "array"))) x <-
## matrix(x, : the condition has length > 1 and only the first element will be used

## Rarefying training data at 1000
## ..........  11 of   17, depth= 1000:  0.06 (0.02)    0.93 (0.02) 0.00 (0.00)

## Warning in if (!is.element(class(x), c("matrix", "data.frame", "array"))) x <-
## matrix(x, : the condition has length > 1 and only the first element will be used

## Rarefying training data at 1000
## ..........  12 of   17, depth= 1000:  0.94 (0.01)    0.05 (0.01) 0.01 (0.00)

## Warning in if (!is.element(class(x), c("matrix", "data.frame", "array"))) x <-
## matrix(x, : the condition has length > 1 and only the first element will be used

## Rarefying training data at 1000
## ..........  13 of   17, depth= 1000:  0.95 (0.01)    0.04 (0.01) 0.01 (0.00)

## Warning in if (!is.element(class(x), c("matrix", "data.frame", "array"))) x <-
## matrix(x, : the condition has length > 1 and only the first element will be used

## Rarefying training data at 1000
## ..........  14 of   17, depth= 1000:  0.95 (0.01)    0.03 (0.01) 0.01 (0.01)

## Warning in if (!is.element(class(x), c("matrix", "data.frame", "array"))) x <-
## matrix(x, : the condition has length > 1 and only the first element will be used

## Rarefying training data at 1000
## ..........  15 of   17, depth= 1000:  0.94 (0.02)    0.05 (0.02) 0.01 (0.01)

## Warning in if (!is.element(class(x), c("matrix", "data.frame", "array"))) x <-
## matrix(x, : the condition has length > 1 and only the first element will be used

## Rarefying training data at 1000
## ..........  16 of   17, depth= 1000:  0.97 (0.01)    0.02 (0.01) 0.01 (0.00)

## Warning in if (!is.element(class(x), c("matrix", "data.frame", "array"))) x <-
## matrix(x, : the condition has length > 1 and only the first element will be used

## Rarefying training data at 1000
## ..........  17 of   17, depth= 1000:  0.96 (0.02)    0.03 (0.02) 0.01 (0.00)

Here are some (really ugly) pie charts that sourcetracker will plot for you. You can also try type = ‘bar’ or ‘dist’ though I think really those outputs are not easily customizable.

# plot results
labels <- desc
plot(results, labels[test.ix], type='pie')

# plot results with legend
plot(results, labels[test.ix], type='pie', include.legend=TRUE)
plot(results.train, labels[train.ix], type='pie', include.legend=TRUE)

Instead of using those plots, you can extract the results like so and then make all your plots in either base R or ggplot just like you’ve already learned to do.

downstream <- data.frame(results$proportions)
downstream$id <- row.names(downstream)
meltdown <- melt(downstream, id.vars=c("id"))

sc.metadata

colnames(meltdown) <- c("id", "Upstream.Site", "Proportion")
meltdown$Downstream.Site <- sc.metadata[as.vector(meltdown$id), "Site_location"]
meltdown$Date <- sc.metadata[as.vector(meltdown$id), "Date_sampled"]
meltdown$Category <- sc.metadata[as.vector(meltdown$id), "sampling_category"]

#Some manipulation here to create more variables. You can also do this in excel with your original metadata file before you start any of this if you'd rather. 
meltdown$Month <- sapply(meltdown$Date, function(x) strsplit(x, "/")[[1]][1])
meltdown

theme_set(theme_classic(base_size=10, base_family="Avenir"))

ggplot(meltdown, aes(x=id, y=Proportion, fill=Upstream.Site)) + geom_bar(stat="identity") + facet_wrap(~Downstream.Site+Month, scales="free") + theme(axis.text.x = element_blank()) + scale_fill_manual(values=m.g.colors, name="Source Sites", labels=c("Skunk Creek Transition", "Skunk Creek Tributary", "Unknown")) + xlab("")

Save a high quality version.

ggsave("skunkcreek_family_example_fwrap.png", width=7, height=5, dpi=300)

Genus Skunk Creek

This is the same analysis as above but with Genera rather than family and some fancier plotting at the end.

sc.physeq <- prune_samples(sample_data(genus.glom)$watershed == "Skunk Creek", genus.glom)%>% prune_taxa(taxa_sums(.) > 0,.)

sc.physeq

## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 666 taxa and 47 samples ]
## sample_data() Sample Data:       [ 47 samples by 6 sample variables ]
## tax_table()   Taxonomy Table:    [ 666 taxa by 6 taxonomic ranks ]

sc.gen.otus <- data.frame(otu_table(sc.physeq))

sc.metadata <- data.frame(sample_data(sc.physeq))

t.otus <- t(as.matrix(sc.gen.otus))
train.ix <- which(sc.metadata$SourceSink=='source')
test.ix <- which(sc.metadata$SourceSink=='sink')
envs <- sc.metadata$Site_location

desc <- sc.metadata$Site_location

tune.results <- tune.st(t.otus[train.ix,], envs[train.ix])
alpha1 <- tune.results$best.alpha1
alpha2 <- tune.results$best.alpha2

alpha1
alpha2

st <- sourcetracker(t.otus[train.ix,], envs[train.ix])

## Warning in if (!is.element(class(x), c("matrix", "data.frame", "array"))) x <-
## matrix(x, : the condition has length > 1 and only the first element will be used

## Rarefying training data at 1000

Estimate source proportions in test data

results <- predict(st,t.otus[test.ix,], alpha1=alpha1, alpha2=alpha2)

##                                        Skunk Cree     Skunk Cree     Unknown

## Warning in if (!is.element(class(x), c("matrix", "data.frame", "array"))) x <-
## matrix(x, : the condition has length > 1 and only the first element will be used

## ..........   1 of   30, depth= 1000:  0.03 (0.01)    0.17 (0.03) 0.80 (0.03)
## ..........   2 of   30, depth= 1000:  0.03 (0.01)    0.18 (0.02) 0.79 (0.02)
## ..........   3 of   30, depth= 1000:  0.01 (0.01)    0.06 (0.02) 0.92 (0.02)
## ..........   4 of   30, depth= 1000:  0.02 (0.01)    0.07 (0.02) 0.91 (0.02)
## ..........   5 of   30, depth= 1000:  0.02 (0.00)    0.05 (0.01) 0.93 (0.01)
## ..........   6 of   30, depth= 1000:  0.02 (0.01)    0.09 (0.02) 0.89 (0.02)
## ..........   7 of   30, depth= 1000:  0.01 (0.00)    0.05 (0.02) 0.94 (0.02)
## ..........   8 of   30, depth= 1000:  0.01 (0.00)    0.04 (0.01) 0.95 (0.01)
## ..........   9 of   30, depth= 1000:  0.01 (0.00)    0.04 (0.01) 0.96 (0.01)
## ..........  10 of   30, depth= 1000:  0.23 (0.04)    0.60 (0.03) 0.17 (0.01)
## ..........  11 of   30, depth= 1000:  0.38 (0.04)    0.60 (0.04) 0.03 (0.00)
## ..........  12 of   30, depth= 1000:  0.71 (0.03)    0.16 (0.03) 0.13 (0.01)
## ..........  13 of   30, depth= 1000:  0.20 (0.03)    0.10 (0.02) 0.70 (0.04)
## ..........  14 of   30, depth= 1000:  0.54 (0.02)    0.44 (0.02) 0.02 (0.01)
## ..........  15 of   30, depth= 1000:  0.28 (0.03)    0.70 (0.02) 0.01 (0.01)
## ..........  16 of   30, depth= 1000:  0.05 (0.01)    0.10 (0.03) 0.85 (0.03)
## ..........  17 of   30, depth= 1000:  0.05 (0.01)    0.09 (0.03) 0.86 (0.03)
## ..........  18 of   30, depth= 1000:  0.70 (0.02)    0.05 (0.01) 0.25 (0.01)
## ..........  19 of   30, depth= 1000:  0.81 (0.02)    0.06 (0.01) 0.13 (0.01)
## ..........  20 of   30, depth= 1000:  0.76 (0.03)    0.04 (0.01) 0.20 (0.02)
## ..........  21 of   30, depth= 1000:  0.24 (0.05)    0.74 (0.05) 0.02 (0.00)
## ..........  22 of   30, depth= 1000:  0.12 (0.02)    0.87 (0.02) 0.01 (0.00)
## ..........  23 of   30, depth= 1000:  0.59 (0.03)    0.37 (0.04) 0.04 (0.01)
## ..........  24 of   30, depth= 1000:  0.48 (0.04)    0.50 (0.04) 0.02 (0.00)
## ..........  25 of   30, depth= 1000:  0.21 (0.05)    0.70 (0.03) 0.09 (0.02)
## ..........  26 of   30, depth= 1000:  0.18 (0.02)    0.80 (0.02) 0.02 (0.01)
## ..........  27 of   30, depth= 1000:  0.89 (0.02)    0.06 (0.02) 0.06 (0.01)
## ..........  28 of   30, depth= 1000:  0.84 (0.03)    0.10 (0.03) 0.06 (0.00)
## ..........  29 of   30, depth= 1000:  0.42 (0.05)    0.08 (0.02) 0.50 (0.05)
## ..........  30 of   30, depth= 1000:  0.06 (0.01)    0.03 (0.01) 0.91 (0.02)

results.train <- predict(st, alpha1=alpha1, alpha2=alpha2)

## ndraws=10, V=3, T=666, N=17

## Warning in if (!is.element(class(x), c("matrix", "data.frame", "array"))) x <-
## matrix(x, : the condition has length > 1 and only the first element will be used

## Rarefying training data at 1000
## ..........   1 of   17, depth= 1000:  0.02 (0.01)    0.98 (0.01) 0.00 (0.00)

## Warning in if (!is.element(class(x), c("matrix", "data.frame", "array"))) x <-
## matrix(x, : the condition has length > 1 and only the first element will be used

## Rarefying training data at 1000
## ..........   2 of   17, depth= 1000:  0.10 (0.02)    0.90 (0.02) 0.00 (0.00)

## Warning in if (!is.element(class(x), c("matrix", "data.frame", "array"))) x <-
## matrix(x, : the condition has length > 1 and only the first element will be used

## Rarefying training data at 1000
## ..........   3 of   17, depth= 1000:  0.23 (0.03)    0.76 (0.03) 0.01 (0.01)

## Warning in if (!is.element(class(x), c("matrix", "data.frame", "array"))) x <-
## matrix(x, : the condition has length > 1 and only the first element will be used

## Rarefying training data at 1000
## ..........   4 of   17, depth= 1000:  0.07 (0.02)    0.92 (0.02) 0.01 (0.00)

## Warning in if (!is.element(class(x), c("matrix", "data.frame", "array"))) x <-
## matrix(x, : the condition has length > 1 and only the first element will be used

## Rarefying training data at 1000
## ..........   5 of   17, depth= 1000:  0.01 (0.01)    0.98 (0.01) 0.00 (0.00)

## Warning in if (!is.element(class(x), c("matrix", "data.frame", "array"))) x <-
## matrix(x, : the condition has length > 1 and only the first element will be used

## Rarefying training data at 1000
## ..........   6 of   17, depth= 1000:  0.01 (0.00)    0.98 (0.00) 0.00 (0.00)

## Warning in if (!is.element(class(x), c("matrix", "data.frame", "array"))) x <-
## matrix(x, : the condition has length > 1 and only the first element will be used

## Rarefying training data at 1000
## ..........   7 of   17, depth= 1000:  0.08 (0.02)    0.91 (0.02) 0.01 (0.00)

## Warning in if (!is.element(class(x), c("matrix", "data.frame", "array"))) x <-
## matrix(x, : the condition has length > 1 and only the first element will be used

## Rarefying training data at 1000
## ..........   8 of   17, depth= 1000:  0.08 (0.02)    0.91 (0.02) 0.01 (0.00)

## Warning in if (!is.element(class(x), c("matrix", "data.frame", "array"))) x <-
## matrix(x, : the condition has length > 1 and only the first element will be used

## Rarefying training data at 1000
## ..........   9 of   17, depth= 1000:  0.53 (0.05)    0.24 (0.06) 0.23 (0.02)

## Warning in if (!is.element(class(x), c("matrix", "data.frame", "array"))) x <-
## matrix(x, : the condition has length > 1 and only the first element will be used

## Rarefying training data at 1000
## ..........  10 of   17, depth= 1000:  0.85 (0.04)    0.15 (0.04) 0.00 (0.00)

## Warning in if (!is.element(class(x), c("matrix", "data.frame", "array"))) x <-
## matrix(x, : the condition has length > 1 and only the first element will be used

## Rarefying training data at 1000
## ..........  11 of   17, depth= 1000:  0.08 (0.01)    0.91 (0.01) 0.01 (0.00)

## Warning in if (!is.element(class(x), c("matrix", "data.frame", "array"))) x <-
## matrix(x, : the condition has length > 1 and only the first element will be used

## Rarefying training data at 1000
## ..........  12 of   17, depth= 1000:  0.97 (0.01)    0.03 (0.01) 0.00 (0.00)

## Warning in if (!is.element(class(x), c("matrix", "data.frame", "array"))) x <-
## matrix(x, : the condition has length > 1 and only the first element will be used

## Rarefying training data at 1000
## ..........  13 of   17, depth= 1000:  0.97 (0.01)    0.03 (0.01) 0.01 (0.00)

## Warning in if (!is.element(class(x), c("matrix", "data.frame", "array"))) x <-
## matrix(x, : the condition has length > 1 and only the first element will be used

## Rarefying training data at 1000
## ..........  14 of   17, depth= 1000:  0.97 (0.01)    0.02 (0.01) 0.01 (0.00)

## Warning in if (!is.element(class(x), c("matrix", "data.frame", "array"))) x <-
## matrix(x, : the condition has length > 1 and only the first element will be used

## Rarefying training data at 1000
## ..........  15 of   17, depth= 1000:  0.95 (0.01)    0.04 (0.01) 0.01 (0.00)

## Warning in if (!is.element(class(x), c("matrix", "data.frame", "array"))) x <-
## matrix(x, : the condition has length > 1 and only the first element will be used

## Rarefying training data at 1000
## ..........  16 of   17, depth= 1000:  0.98 (0.01)    0.01 (0.00) 0.01 (0.00)

## Warning in if (!is.element(class(x), c("matrix", "data.frame", "array"))) x <-
## matrix(x, : the condition has length > 1 and only the first element will be used

## Rarefying training data at 1000
## ..........  17 of   17, depth= 1000:  0.97 (0.01)    0.02 (0.01) 0.02 (0.01)

downstream <- data.frame(results$proportions)
downstream$id <- row.names(downstream)
meltdown <- melt(downstream, id.vars=c("id"))

sc.metadata

colnames(meltdown) <- c("id", "Upstream.Site", "Proportion")
meltdown$Downstream.Site <- sc.metadata[as.vector(meltdown$id), "Site_location"]
meltdown$Date <- sc.metadata[as.vector(meltdown$id), "Date_sampled"]
meltdown$Category <- sc.metadata[as.vector(meltdown$id), "sampling_category"]

meltdown$Month <- as.factor(sapply(meltdown$Date, function(x) strsplit(x, "/")[[1]][1]))
levels(meltdown$Month) <- c("June", "July", "August")
meltdown$Day<- as.factor(sapply(meltdown$Date, function(x) strsplit(x, "/")[[1]][2]))

meltdown$Rep <- as.factor(sapply(meltdown$id, function(x) strsplit(x, "_")[[1]][3]))
levels(meltdown$Rep) <- c("A", "AA", "B")

meltdown$Category <- as.factor(meltdown$Category)
levels(meltdown$Category) <- c("B", "E")
 
meltdown$SID <- as.factor(sapply(meltdown$id, function(x) strsplit(x, "_")[[1]][5]))

meltdown$TypeRep <- paste(meltdown$Category, meltdown$Rep, meltdown$Day, sep=".")

theme_set(theme_classic(base_size=10, base_family="Avenir"))

ggplot(meltdown, aes(x=TypeRep, y=Proportion, fill=Upstream.Site)) + geom_bar(stat="identity") + facet_wrap(~Downstream.Site+Month, scales="free_x") + scale_fill_manual(values=m.g.colors, name="Source Sites", labels=c("Skunk Creek Transition", "Skunk Creek Tributary", "Unknown")) + xlab("")

ggplot(meltdown, aes(x=Category, y=Proportion, fill=Upstream.Site)) + geom_bar(stat="identity") + facet_grid(rows=vars(Downstream.Site), cols=vars(Month), space="free", scales="free_x") + scale_fill_manual(values=m.g.colors, name="Source Sites", labels=c("Skunk Creek Transition", "Skunk Creek Tributary", "Unknown")) + xlab("")

ggsave("skunkcreek_genera_example_fwrap.png", width=9, height=5, dpi=300)

Knights, Dan, Justin Kuczynski, Emily S Charlson, Jesse Zaneveld, Michael C Mozer, Ronald G Collman, Frederic D Bushman, Rob Knight, and Scott T Kelley. 2011. “Bayesian Community-Wide Culture-Independent Microbial Source Tracking.” Nature Methods 8 (9): 761–63.

R Notebook

Set Up

Skunk Creek

Genus Skunk Creek