Clean up epigenetic tools

additionally we should add the capability to generate fragmentsize distribution plots, this could either be an R script or with deeptools https://deeptools.readthedocs.io/en/develop/content/tools/bamPEFragmentSize.html
library(tidyr)
library(stringr)
library(tidyverse)
library(apird)

setwd("/Volumes/Bioinformatics/pipeline/Illumina/220711_M06794_0010_ADH8FDXX/Project_P425-3Processed_globus_220712")

theme_set(
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.border = element_rect(colour="black", fill=NA, size=1),
        axis.text = element_text(colour="black"),
        axis.ticks = element_line(colour="black"),
        legend.key = element_blank(),
        text = element_text(size=20),
        strip.text.x = element_text(size = 10,margin = margin( b = 2, t = 2) ),
        strip.background = element_rect(fill="white", colour="black"),
        panel.background = element_rect(fill="white"),
        )
  )

libs = getProjectLibs("P425-3")

anno = getAnno(libs)
metrics = getMetrics(libs)

design <- full_join(anno, metrics, by = c("libid"))

fragmentDir <-  file.path(getwd(),"insertSizes")
fragmentFiles <- list.files(fragmentDir, full.names = T)

unpackFragData <- function(fragmentFile, annotation, includeShortFrags = F){
  libid <- str_extract(fragmentFile, "lib[0-9]+")
  cat(sprintf("Counting fragments of %s... \n", libid))

  fragmentDistribution <- read.table(fragmentFile)
  colnames(fragmentDistribution) <- c("nReads", "fragLen")

  fragmentDistribution$shortReads = fragmentDistribution$nReads[fragmentDistribution$fragLen == 0]

  if (!includeShortFrags) {
    fragmentDistribution = dplyr::filter(fragmentDistribution, fragLen > 15)
  }

  fragmentDistribution$fragmentLengths <- abs(fragmentDistribution$fragLen)
  fragmentDistribution$normReads <- fragmentDistribution$nReads/sum(fragmentDistribution$nReads) *10^3

  fragmentDistribution$nFragments = sum(fragmentDistribution$nReads)

  fragmentDistribution$libid <- libid
  return(fragmentDistribution)

}

fragmentList <- lapply(fragmentFiles, function(x) unpackFragData(x, annotation))
fragmentDistributions <- bind_rows(fragmentList)

fragmentDistributions <- merge(fragmentDistributions, design, by="libid")

xrange = c(0,750)

fragmentDistributionPlot  <- fragmentDistributions %>%
  dplyr::filter(fragmentLengths > xrange[1] & fragmentLengths < xrange[2]) %>%
  ggplot(aes(x = fragmentLengths, y = normReads, color = libid)) +
#   scale_color_manual(values = colors.sampleName) + 
  geom_line() +
  xlim(c(xrange[1],xrange[2])) +
  labs(x = "Fragment length, bp", y = expression(paste("Normalized read density (a.u.)")), color = "") + 
  theme(text = element_text(size = 14)) +
  theme(legend.position="right") + 
  theme(legend.direction='vertical')
  #   facet_wrap(. ~ libid, scales="free")

plot(fragmentDistributionPlot)

ggsave("fragmentSizeDistributions.pdf", fragmentDistributionPlot, width = 7, height = 4)
BenaroyaResearch / bripipetools

Clean up epigenetic tools #123