shortRNA
projectThis is a project for the development of short RNA analysis R package. It was initiated by Pierre-Luc in 2017.
shortRNA
analysis steps in R
(a short vignette)install.packages("BiocManager")
BiocManager::install("remotes")
remotes::install_github("shortRNAhub/shortRNA")
library(shortRNA)
qc_SE(file = fq_files, outdir = "output/", ad1 = adapter_sequence)
m <- fastq2SeqCountMatrix(files = trimmed_fastq_files)
fa <- DNAStringSet(row.names(m))
names(fa) <- paste0("S", 1:length(fa))
writeXStringSet(fa, fasta_file)
db <- getDB()
a <- prepareAnnotation(
ensdb = db$ensdb,
output_dir = genomeDir,
extra.gr = list(piRNA = db$piRNA_GR, miRNA = db$miRNA_GR),
extra.seqs = list(rRNA = db$rRNA_fa, tRNA = db$tRNA_fa),
resolveSplicing = NULL,
rules = defaultAssignRules(),
tRNAEnsembleRemove = FALSE,
clusterMiRNA = TRUE
)
alignShortRNA(
fastq = "unique.fasta",
index = "genomeDir/customGenome",
outDir = "align", GTF = exonsBy(db$ensdb),
GTF.featureType = "exon", GTF.attrType = "gene_id"
)
o <- overlapWithTx2(
bamFile = align_file, annotation = a, uniqueFasta = "unique.fasta",
ignoreStrand = TRUE, nbthreads = 16
)
ar <- assignReads(sources = o, rules = defaultAssignRules())
FactorList
fl <- featuresAnnoToFL(a)
ar_tree <- addReadsToTree(
fL = fl,
mappedFeaturesDF = ar,
unassigned = FALSE,
extraTreeBranch = NULL
)
library(TreeSummarizedExperiment)
rt <- ar_tree
as <- list(counts = m)
cd <- DataFrame(samples = c(s1, s2), group - c("group1", "group2"))
tse <- TreeSummarizedExperiment(
assays = as,
rowTree = rt,
colData = cd,
rowData = ar[row.names(m), ],
metadata = list(
assignedReads = ar,
counts = m,
notAligned = getReadsFromBam(
bam = align_file
)
)
)
library(treeClimbR)
dea <- runDA(TSE = tse, filter_min_count = 20)
out <- nodeResult(object = dea, n = Inf)
cand <- getCand(
tree = dea$tree,
score_data = out, node_column = "node",
p_column = "PValue", sign_column = "logFC",
message = FALSE, threshold = 0.05
)
candB <- evalCand(
tree = dea$tree,
levels = cand$candidate_list,
score_data = out, node_column = "node",
p_column = "PValue", sign_column = "logFC",
method = "BH", limit_rej = 0.05,
use_pseudo_leaf = FALSE,
message = FALSE
)
result <- topNodes(object = candB, n = Inf, p_value = 0.05)