janstett / Testing_Repo

0 stars 0 forks source link

Test Reprex #1

Open janstett opened 4 years ago

janstett commented 4 years ago
library(tidyverse)

setwd("C:/Users/Julia/Dropbox/A-PhD_WIPs")

SI<-data.frame(read.csv("SI_Sample_Sequencing_Data.csv"), header=TRUE)

Years<-unique(SI$Year)

depths<-sort(unique(SI$sample_depth))

SI_MetaG<-SI %>% filter (datatype=="metagenomes")
SI_MetaT<-SI %>% filter (datatype=="metatranscriptomes")

ggplot (SI_MetaG, aes(x=cruise_id, y=sample_depth))+
  geom_point(aes(size=5))+
  scale_x_discrete(limits=c(unique(SI$cruise_id)))


ggplot (SI_MetaT, aes(x=cruise_id, y=sample_depth))+
  geom_point(aes(size=5))+
  scale_x_discrete(limits=c(unique(SI$cruise_id)))

Created on 2019-11-26 by the reprex package (v0.3.0)

janstett commented 4 years ago
library(tidyverse)
library(colorspace)
library(RColorBrewer)
library(ggforce)
library(egg)
#> Loading required package: gridExtra
#> 
#> Attaching package: 'gridExtra'
#> The following object is masked from 'package:dplyr':
#> 
#>     combine

#read in data
#Set Working Directory
setwd("C:/Users/Julia/Dropbox/A-PhD_WIPs/B_OMZ_Cataloug/Counts")

#Read in Global Dataset
OMZ<-data.frame(read.csv("Update_Global_SAGs_Sep_03_2019.csv", header =TRUE))

#This reads in a list of the sites with Lat, Long, and O2 phenotype, you might be able to skip this step
Sites_O2<-read.csv("Sites.csv", header=TRUE)
Sites<-as.data.frame(Sites_O2[,1])
oxytype<-Sites_O2[,5]
oxytype<-as.data.frame(unique(oxytype))

#Select unique taxonomies
Tax_list<-unique(OMZ %>% select(Taxonomy) %>% arrange (Taxonomy))

#This reads in a list of the sites with Lat, Long, and O2 phenotype, you might be able to skip this step
Sites_O2<-read.csv("Sites.csv", header=TRUE)
Sites<-as.data.frame(Sites_O2[,1])
oxytype<-Sites_O2$Oxy_Type
oxytype<-as.data.frame(unique(oxytype))

#Variable declaration
Plot_labs<-c()

SSU_plot<-data.frame()

ITS_plot<-data.frame()

WGS_plot<-data.frame()

Tax_list_labs<-read.csv("Tax_list_labs_Apr_25_2019.csv", header = FALSE)

for (i in 1: dim(OMZ)[1]){
  for (j in 1:dim (Tax_list_labs)[1]){
    pattern<-as.character(Tax_list_labs[j,1])
    match<-grepl(pattern, as.character(OMZ$Taxonomy[i]))

    if (match==TRUE){
      OMZ$Plot_Taxa[i]<-pattern
      Plot_labs<-pattern
    }
  }
}

#Separate the global data set into 3 data frames based on how taxonomy was assigned
SSU<-OMZ %>% filter(Primary_Tax_Method=="16S")
SSU_Tax_list<-unique(SSU %>% select(Taxonomy) %>% arrange (Taxonomy))

ITS<-OMZ %>% filter(Primary_Tax_Method=="ITS")
ITS_Tax_list<-unique(ITS %>% select(Plot_Taxa) %>% arrange (Plot_Taxa))

WGS<-OMZ %>% filter(Primary_Tax_Method=="WGS")
WGS_Tax_list<-unique(WGS %>% select(Taxonomy) %>% arrange (Taxonomy))

##################################################
#Completentess/Contamination Plots
#pdf("CC_Plots.pdf")
Seq_OMZ<-OMZ %>% filter(Status=="Sequenced", Contamination<=10)
MDA_Seq<-Seq_OMZ %>% filter (WGA_Approach=="MDA")
WGA_Seq<-Seq_OMZ %>% filter (WGA_Approach=="WGA-X")

Amp_Method<-ggplot(Seq_OMZ, aes(x=Completeness, y=Contamination))+ 
  geom_point(na.rm = TRUE, aes(color=WGA_Approach))+
  facet_wrap(.~WGA_Approach)

###############################################
#Total Completeness, Contamination, Assembly Length Violins

p1_Comp<-ggplot(Seq_OMZ, aes(x=Plot_Taxa, y=Completeness)) + geom_boxplot(na.rm = TRUE, fill="white") +
  #theme(axis.text.x = element_text(angle=45, hjust = 1))+
  xlab("Phylum")+
  ylab("% Completeness")+
  coord_flip()+ scale_x_discrete(limits = rev(Tax_list_labs[,1]))

p2_Contam<-ggplot(Seq_OMZ, aes(x=Plot_Taxa, y=Contamination)) + geom_boxplot(na.rm = TRUE, fill="grey")+
  # theme(axis.text.x = element_text(angle=45, hjust = 1)) +  
  ylab("% Contamination")+
  coord_flip()+ scale_x_discrete(limits = rev(Tax_list_labs[,1]))

p3_Len<-ggplot(Seq_OMZ, aes(x=Plot_Taxa, y=Assembly_Length_MBP)) + 
  geom_boxplot(na.rm = TRUE, colour="black", fill="grey30", outlier.colour = "black")+
  #  theme(axis.text.x = element_text(angle=45, hjust = 1)) +  
  ylab("Assembly Length (Mbp)")+
  coord_flip()+ scale_x_discrete(limits = rev(Tax_list_labs[,1]))

ggarrange(p1_Comp, 
          p2_Contam + 
            theme(axis.text.y = element_blank(),
                  axis.ticks.y = element_blank(),
                  axis.title.y = element_blank() ), 
          p3_Len + 
            theme(axis.text.y = element_blank(),
                  axis.ticks.y = element_blank(),
                  axis.title.y = element_blank() ), nrow=1)

##########################################################################
#Split Between MDA/MDA-X
p1_Comp_MDA<-ggplot(MDA_Seq, aes(x=Plot_Taxa, y=Completeness)) + geom_boxplot(na.rm = TRUE, fill="red") +
  #theme(axis.text.x = element_text(angle=45, hjust = 1))+
  xlab("MDA")+
  ylab("% Completeness")+
  coord_flip()+ scale_x_discrete(limits = rev(Tax_list_labs[,1]))

p2_Contam_MDA<-ggplot(MDA_Seq, aes(x=Plot_Taxa, y=Contamination)) + geom_boxplot(na.rm = TRUE, fill="red")+
  # theme(axis.text.x = element_text(angle=45, hjust = 1)) +  
  ylab("% Contamination")+
  scale_y_continuous(limits=c(0,10), breaks = seq(0, 10, by = 2))+
  coord_flip()+ scale_x_discrete(limits = rev(Tax_list_labs[,1]))

p3_Len_MDA<-ggplot(MDA_Seq, aes(x=Plot_Taxa, y=Assembly_Length_MBP)) + 
  geom_boxplot(na.rm = TRUE, fill="red")+
  #  theme(axis.text.x = element_text(angle=45, hjust = 1)) +  
  ylab("Assembly Length (Mbp)")+
  scale_y_continuous(limits=c(0,6), breaks = seq(0, 6, by = 2))+
  coord_flip()+ scale_x_discrete(limits = rev(Tax_list_labs[,1]))

p1_Comp_WGA<-ggplot(WGA_Seq, aes(x=Plot_Taxa, y=Completeness)) + geom_boxplot(na.rm = TRUE, fill="blue") +
  #theme(axis.text.x = element_text(angle=45, hjust = 1))+
  xlab("MDA-X/WGA-X")+
  ylab("% Completeness")+
  coord_flip()+ scale_x_discrete(limits = rev(Tax_list_labs[,1]))

p2_Contam_WGA<-ggplot(WGA_Seq, aes(x=Plot_Taxa, y=Contamination)) + geom_boxplot(na.rm = TRUE, fill="blue")+
  # theme(axis.text.x = element_text(angle=45, hjust = 1)) +  
  ylab("% Contamination")+
  scale_y_continuous(limits=c(0,10), breaks = seq(0, 10, by = 2))+
  coord_flip()+ scale_x_discrete(limits = rev(Tax_list_labs[,1]))

p3_Len_WGA<-ggplot(WGA_Seq, aes(x=Plot_Taxa, y=Assembly_Length_MBP)) + 
  geom_boxplot(na.rm = TRUE,  fill="blue")+
  #  theme(axis.text.x = element_text(angle=45, hjust = 1)) +  
  ylab("Assembly Length (Mbp)")+
  scale_y_continuous(limits=c(0,6), breaks = seq(0, 6, by = 2))+
  coord_flip()+ scale_x_discrete(limits = rev(Tax_list_labs[,1]))

ggarrange(p1_Comp_MDA, 
          p2_Contam_MDA + 
            theme(axis.text.y = element_blank(),
                  axis.ticks.y = element_blank(),
                  axis.title.y = element_blank() ),

          p3_Len_MDA + 
            theme(axis.text.y = element_blank(),
                  axis.ticks.y = element_blank(),
                  axis.title.y = element_blank() ), 

          p1_Comp_WGA,

          p2_Contam_WGA + 
            theme(axis.text.y = element_blank(),
                  axis.ticks.y = element_blank(),
                  axis.title.y = element_blank() ), 

          p3_Len_WGA + 
            theme(axis.text.y = element_blank(),
                  axis.ticks.y = element_blank(),
                  axis.title.y = element_blank() ),
          nrow=2)

Created on 2019-12-03 by the reprex package (v0.3.0)