Open janstett opened 4 years ago
library(tidyverse)
library(colorspace)
library(RColorBrewer)
library(ggforce)
library(egg)
#> Loading required package: gridExtra
#>
#> Attaching package: 'gridExtra'
#> The following object is masked from 'package:dplyr':
#>
#> combine
#read in data
#Set Working Directory
setwd("C:/Users/Julia/Dropbox/A-PhD_WIPs/B_OMZ_Cataloug/Counts")
#Read in Global Dataset
OMZ<-data.frame(read.csv("Update_Global_SAGs_Sep_03_2019.csv", header =TRUE))
#This reads in a list of the sites with Lat, Long, and O2 phenotype, you might be able to skip this step
Sites_O2<-read.csv("Sites.csv", header=TRUE)
Sites<-as.data.frame(Sites_O2[,1])
oxytype<-Sites_O2[,5]
oxytype<-as.data.frame(unique(oxytype))
#Select unique taxonomies
Tax_list<-unique(OMZ %>% select(Taxonomy) %>% arrange (Taxonomy))
#This reads in a list of the sites with Lat, Long, and O2 phenotype, you might be able to skip this step
Sites_O2<-read.csv("Sites.csv", header=TRUE)
Sites<-as.data.frame(Sites_O2[,1])
oxytype<-Sites_O2$Oxy_Type
oxytype<-as.data.frame(unique(oxytype))
#Variable declaration
Plot_labs<-c()
SSU_plot<-data.frame()
ITS_plot<-data.frame()
WGS_plot<-data.frame()
Tax_list_labs<-read.csv("Tax_list_labs_Apr_25_2019.csv", header = FALSE)
for (i in 1: dim(OMZ)[1]){
for (j in 1:dim (Tax_list_labs)[1]){
pattern<-as.character(Tax_list_labs[j,1])
match<-grepl(pattern, as.character(OMZ$Taxonomy[i]))
if (match==TRUE){
OMZ$Plot_Taxa[i]<-pattern
Plot_labs<-pattern
}
}
}
#Separate the global data set into 3 data frames based on how taxonomy was assigned
SSU<-OMZ %>% filter(Primary_Tax_Method=="16S")
SSU_Tax_list<-unique(SSU %>% select(Taxonomy) %>% arrange (Taxonomy))
ITS<-OMZ %>% filter(Primary_Tax_Method=="ITS")
ITS_Tax_list<-unique(ITS %>% select(Plot_Taxa) %>% arrange (Plot_Taxa))
WGS<-OMZ %>% filter(Primary_Tax_Method=="WGS")
WGS_Tax_list<-unique(WGS %>% select(Taxonomy) %>% arrange (Taxonomy))
##################################################
#Completentess/Contamination Plots
#pdf("CC_Plots.pdf")
Seq_OMZ<-OMZ %>% filter(Status=="Sequenced", Contamination<=10)
MDA_Seq<-Seq_OMZ %>% filter (WGA_Approach=="MDA")
WGA_Seq<-Seq_OMZ %>% filter (WGA_Approach=="WGA-X")
Amp_Method<-ggplot(Seq_OMZ, aes(x=Completeness, y=Contamination))+
geom_point(na.rm = TRUE, aes(color=WGA_Approach))+
facet_wrap(.~WGA_Approach)
###############################################
#Total Completeness, Contamination, Assembly Length Violins
p1_Comp<-ggplot(Seq_OMZ, aes(x=Plot_Taxa, y=Completeness)) + geom_boxplot(na.rm = TRUE, fill="white") +
#theme(axis.text.x = element_text(angle=45, hjust = 1))+
xlab("Phylum")+
ylab("% Completeness")+
coord_flip()+ scale_x_discrete(limits = rev(Tax_list_labs[,1]))
p2_Contam<-ggplot(Seq_OMZ, aes(x=Plot_Taxa, y=Contamination)) + geom_boxplot(na.rm = TRUE, fill="grey")+
# theme(axis.text.x = element_text(angle=45, hjust = 1)) +
ylab("% Contamination")+
coord_flip()+ scale_x_discrete(limits = rev(Tax_list_labs[,1]))
p3_Len<-ggplot(Seq_OMZ, aes(x=Plot_Taxa, y=Assembly_Length_MBP)) +
geom_boxplot(na.rm = TRUE, colour="black", fill="grey30", outlier.colour = "black")+
# theme(axis.text.x = element_text(angle=45, hjust = 1)) +
ylab("Assembly Length (Mbp)")+
coord_flip()+ scale_x_discrete(limits = rev(Tax_list_labs[,1]))
ggarrange(p1_Comp,
p2_Contam +
theme(axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
axis.title.y = element_blank() ),
p3_Len +
theme(axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
axis.title.y = element_blank() ), nrow=1)
##########################################################################
#Split Between MDA/MDA-X
p1_Comp_MDA<-ggplot(MDA_Seq, aes(x=Plot_Taxa, y=Completeness)) + geom_boxplot(na.rm = TRUE, fill="red") +
#theme(axis.text.x = element_text(angle=45, hjust = 1))+
xlab("MDA")+
ylab("% Completeness")+
coord_flip()+ scale_x_discrete(limits = rev(Tax_list_labs[,1]))
p2_Contam_MDA<-ggplot(MDA_Seq, aes(x=Plot_Taxa, y=Contamination)) + geom_boxplot(na.rm = TRUE, fill="red")+
# theme(axis.text.x = element_text(angle=45, hjust = 1)) +
ylab("% Contamination")+
scale_y_continuous(limits=c(0,10), breaks = seq(0, 10, by = 2))+
coord_flip()+ scale_x_discrete(limits = rev(Tax_list_labs[,1]))
p3_Len_MDA<-ggplot(MDA_Seq, aes(x=Plot_Taxa, y=Assembly_Length_MBP)) +
geom_boxplot(na.rm = TRUE, fill="red")+
# theme(axis.text.x = element_text(angle=45, hjust = 1)) +
ylab("Assembly Length (Mbp)")+
scale_y_continuous(limits=c(0,6), breaks = seq(0, 6, by = 2))+
coord_flip()+ scale_x_discrete(limits = rev(Tax_list_labs[,1]))
p1_Comp_WGA<-ggplot(WGA_Seq, aes(x=Plot_Taxa, y=Completeness)) + geom_boxplot(na.rm = TRUE, fill="blue") +
#theme(axis.text.x = element_text(angle=45, hjust = 1))+
xlab("MDA-X/WGA-X")+
ylab("% Completeness")+
coord_flip()+ scale_x_discrete(limits = rev(Tax_list_labs[,1]))
p2_Contam_WGA<-ggplot(WGA_Seq, aes(x=Plot_Taxa, y=Contamination)) + geom_boxplot(na.rm = TRUE, fill="blue")+
# theme(axis.text.x = element_text(angle=45, hjust = 1)) +
ylab("% Contamination")+
scale_y_continuous(limits=c(0,10), breaks = seq(0, 10, by = 2))+
coord_flip()+ scale_x_discrete(limits = rev(Tax_list_labs[,1]))
p3_Len_WGA<-ggplot(WGA_Seq, aes(x=Plot_Taxa, y=Assembly_Length_MBP)) +
geom_boxplot(na.rm = TRUE, fill="blue")+
# theme(axis.text.x = element_text(angle=45, hjust = 1)) +
ylab("Assembly Length (Mbp)")+
scale_y_continuous(limits=c(0,6), breaks = seq(0, 6, by = 2))+
coord_flip()+ scale_x_discrete(limits = rev(Tax_list_labs[,1]))
ggarrange(p1_Comp_MDA,
p2_Contam_MDA +
theme(axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
axis.title.y = element_blank() ),
p3_Len_MDA +
theme(axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
axis.title.y = element_blank() ),
p1_Comp_WGA,
p2_Contam_WGA +
theme(axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
axis.title.y = element_blank() ),
p3_Len_WGA +
theme(axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
axis.title.y = element_blank() ),
nrow=2)
Created on 2019-12-03 by the reprex package (v0.3.0)
Created on 2019-11-26 by the reprex package (v0.3.0)