title: CSCpromoters tags:
The goal of CSCpromoters is to extract the sequences of promoter regions from the genome fasta files.
CSCpromoters
allows users to extract the sequences of promoter regions
from the genome fasta files. CSCpromoters
can calculate the length of
promoters based on the distance to the closest upstream gene.
CSCpromoters
also provides the option for users to enter the
user-defined minimum and maximum promoter length.
You can install the development version of CSCpromoters
from
GitHub with:
# install.packages("devtools")
devtools::install_github("thiagomaf/CSCpromoters")
# These commands will not work while the repositories are private.
# There are work-around, we can talk about it later.
Alternatively, you can download this git repository and install it
locally. To do that, download the content of this repository by clicking
the big green Code
button (on the top right) and then click
Download ZIP
. Download it to a local folder.
After that, run the following code line:
devtools::install_local("PATH/CSCpromoters-main.zip")
This is a basic example which shows you how to solve a common problem:
library(magrittr)
library(ggplot2)
library(CSCpromoters)
# MIN-YAO, set eval=TRUE and comment the next block to run on your computer
gff_folder <- paste0(
"./genome_barley_GP/" # user defined path
)
fasta_list <- c(
"chr1H" = "./genome_barley_GP/Hordeum_vulgare.refseq[chr1H].fasta",
"chr2H" = "./genome_barley_GP/Hordeum_vulgare.refseq[chr2H].fasta",
"chr3H" = "./genome_barley_GP/Hordeum_vulgare.refseq[chr3H].fasta",
"chr4H" = "./genome_barley_GP/Hordeum_vulgare.refseq[chr4H].fasta",
"chr5H" = "./genome_barley_GP/Hordeum_vulgare.refseq[chr5H].fasta",
"chr6H" = "./genome_barley_GP/Hordeum_vulgare.refseq[chr6H].fasta",
"chr7H" = "./genome_barley_GP/Hordeum_vulgare.refseq[chr7H].fasta",
"chrUn" = "./genome_barley_GP/Hordeum_vulgare.refseq[chrUn].fasta"
)
txdb <- paste0(
gff_folder,
"Horvul_GP_v1r1_Apollo_30_06_20_named_product_GO.gff3"
) %>%
make_txdb(
.data_source = "Hv - Golden Promise",
.organism = "Hordeum vulgare"
)
## Import genomic features from the file as a GRanges object ... OK
## Prepare the 'metadata' data frame ... OK
## Make the TxDb object ... OK
# For some reason this cannot be properly loaded from an .RData or .rda file,
# must be run on every new R session
annotations <- txdb %>%
get_txdb_annotation()
annotations %>%
# Wrap-up function
get_promoters(
.txdb = txdb,
.FASTA_list = fasta_list,
.keep = c( # user defined gene list
"chr1Hg0040651",
"chr3Hg0328761",
"chr3Hg0345511",
"chr4Hg0383761",
"chr4Hg0380431",
"chr4Hg0417171",
"chr4Hg0421931",
"chr5Hg0547031",
"chr6Hg0657981",
"chr7Hg0679581"
)
)
## DNAStringSet object of length 8:
## width seq names
## [1] 2000 GTAACAATAGTAACAAGGTGCAC...CCAGCCGTACAGACGATATTTCA chr1Hg0040651
## [2] 2000 AACTAATCTGTGGTTGGATGACT...ATTGTTCCGGCACGGGGCTGGGG chr3Hg0328761
## [3] 2000 ACATAGAAAGTATGCACATGACA...CCTCCCTCCCTCCCTCCCCCCAA chr3Hg0345511
## [4] 2000 TGCATTTGACACATCAGATTTGG...ATGGCGCGGCTACGTCTGCTACG chr4Hg0380431
## [5] 2000 TTTAATGCAATGTATGAATATGA...AAGACTGCATAAAGTTTGGATCA chr4Hg0383761
## [6] 2000 GAGAGCCTTTGGTCAGCAAAGAT...GCTCTCGATCGCATGGAAGAAAA chr4Hg0417171
## [7] 1698 GACATATATGTGTCTCATAATGA...TACCCGCGCTACTATTACAGGAA chr5Hg0547031
## [8] 2000 GACGGACGGATGGATCATGGATG...TGTGAGCCTGAGATGCAGGGGAA chr6Hg0657981
promoter_sizes <- annotations %>%
# Choose which loci to use
filter_locus(
.keep = c(
"chr1Hg0040651",
"chr3Hg0328761",
"chr3Hg0345511",
"chr4Hg0383761",
"chr4Hg0380431",
"chr4Hg0417171",
"chr4Hg0421931",
"chr5Hg0547031",
"chr6Hg0657981",
"chr7Hg0679581"
)
) %>%
# Calculate distances to closest upstream locus
get_promoter_distances() %>%
# Trim found upstream distances and define promoter lengths
set_promoter_sizes(.min_size = 100, .max_size = 2000)
promoter_sizes %>%
plot_promoter_maps(annotations)
promoter_sizes %>%
get_promoter_sequences(.txdb = txdb, .FASTA_list = fasta_list) %>%
write_fasta("data/promoters_Min-Yao.fasta")
[gff_folder]
/Annotation_Golden_Promise_v1r1_Apollo_300620_mRNA.fasta
[gff_folder]
/Horvul_GP_v1r1_Apollo_30_06_20_named_product_GO.gff3[fasta_list]
/Hordeum_vulgare.refseq[chr1H].fasta[fasta_list]
/Hordeum_vulgare.refseq[chr2H].fasta[fasta_list]
/Hordeum_vulgare.refseq[chr3H].fasta[fasta_list]
/Hordeum_vulgare.refseq[chr4H].fasta[fasta_list]
/Hordeum_vulgare.refseq[chr5H].fasta[fasta_list]
/Hordeum_vulgare.refseq[chr6H].fasta[fasta_list]
/Hordeum_vulgare.refseq[chr7H].fasta[fasta_list]
/Hordeum_vulgare.refseq[chrUn].fasta