Closed elhaam closed 1 year ago
Follow-up: I have tried the following code to generate them, would you mind checking if this is the desired approach to get the files? Thank you in advance!
if (!require("BiocManager", quietly = TRUE))
install.packages("BiocManager")
BiocManager::install("TxDb.Mmusculus.UCSC.mm10.knownGene")
BiocManager::install("EnsDb.Mmusculus.v79")
library(stringr)
library(magrittr)
library(Matrix)
library(tidyverse)
library(dplyr)
library(GenomeInfoDb)
library(TxDb.Mmusculus.UCSC.mm10.knownGene)
library(EnsDb.Mmusculus.v79)
dir.create("proms_mm10")
gene.coords <- genes(EnsDb.Mmusculus.v79, filter = ~ gene_biotype == "protein_coding")
df <- data.frame(seqnames=seqnames(gene.coords),
starts=start(gene.coords)-1,
ends=end(gene.coords),
names=elementMetadata(gene.coords)$symbol,
scores=".",
strands=strand(gene.coords)
)
dff <- df %>% arrange(seqnames, starts, ends)
dff$seqnames <- paste0("chr", dff$seqnames)
write.table(dff, "proms_mm10/mm10_genes.bed", sep="\t", col.names=F, row.names=F, quote=F)
TSS <- unique(resize(GenomicFeatures::transcripts(TxDb.Hsapiens.UCSC.hg38.knownGene, columns=c("GENEID")),
width = 1, fix = "start"))
TSS <- GenomeInfoDb::keepStandardChromosomes(TSS, pruning.mode = "coarse")
mcols(TSS)$GENEID <-as.numeric(mcols(TSS)$GENEID)
TSS <- TSS[!is.na(TSS$GENEID), ]
mcols(TSS)$GENEID <-as.character(mcols(TSS)$GENEID)
mcols(TSS)$symbol <- AnnotationDbi::mapIds(org.Hs.eg.db,
keys = mcols(TSS)$GENEID,
column = "SYMBOL",
keytype = "ENTREZID",
multiVals = "first")
TSS <- TSS[!is.na(mcols(TSS)$symbol), ]
TSS <- TSS[mcols(TSS)$symbol %in% dff$names, ]
df_TSS <- as.tibble(TSS)
df_TSS <- df_TSS%>% arrange(seqnames, start, end)
df_TSS <- df_TSS[!duplicated(df_TSS$symbol), ]
write.table(df_TSS, "proms_mm10/mm10_tss.bed", sep="\t", col.names=F, row.names=F, quote=F)
gr_promoter <- makeGRangesFromDataFrame(df_TSS, keep.extra.columns=T)
df_promoter <- as.tibble(promoters(gr_promoter, upstream = 3000, downstream = 0))
df_promoter <- df_promoter[, c("seqnames", "start", "end", "symbol", "width", "strand")]
df_promoter$width <- "."
write.table(df_promoter, "proms_mm10/mm10_promoter.bed", sep="\t", col.names=F, row.names=F, quote=F)
Hello LIGER team,
Thank you very much for the useful package!
I was wondering if you have any instructions on creating genes and promoters for Mouse species? I can see instructions for them in Human data (https://www.biostars.org/p/427912/ and https://www.biostars.org/p/8207/), but not sure what is the best approach to create Mouse data for two Mus datasets in SNAREseq. If you possibly have the links to these two files, would you mind sharing the links to the bed files? My goal is to use these files to integrate RNAseq and ATACseq data using LIGER.
Thanks!