Closed ParthaZain closed 3 years ago
Sorry @ParthaZain for the long delay... This has always been functional if you supply the GWAS based matches file to ArchR. However, here is a walkthrough for simplicity showing ArchR = gchromVAR
#New Release Branch
devtools::install_github("GreenleafLab/ArchR", ref="release_1.0.0", repos = BiocManager::repositories())
#######
devtools::install_github("caleblareau/gchromVAR")
library(chromVAR)
library(gchromVAR)
library(SummarizedExperiment)
library(data.table)
library(BiocParallel)
library(BSgenome.Hsapiens.UCSC.hg19)
set.seed(123)
register(MulticoreParam(2))
#######################
# New Result
#######################
library(ArchR)
proj <- getTestProject()
#Get Peak Matrix
se <- getMatrixFromProject(proj, "PeakMatrix")
rownames(se) <- paste0(seqnames(se), "_", start(se), "_",end(se))
peaks <- getPeakSet(proj)
names(peaks) <- paste0(seqnames(peaks), "_", start(peaks), "_",end(peaks))
mcols(se)$bias <- peaks[rownames(se)]$GC
#Get Background Peaks
proj <- addBgdPeaks(proj)
bgdPeaks <- getBgdPeaks(proj)
rownames(bgdPeaks) <- paste0(seqnames(bgdPeaks), "_", start(bgdPeaks), "_",end(bgdPeaks))
bgdPeaks <- bgdPeaks[rownames(se),]
#Get Bed GWAS using gChromVAR
files <- list.files(system.file('extdata/paper/PP001',package='gchromVAR'),
full.names = TRUE, pattern = "*.bed$")
ukbb <- gchromVAR::importBedScore(rowRanges(se), files, colidx = 5)
################
# gChromVAR
################
#Weighted Deviations
names(assays(se)) <- "counts"
ukbb_wDEV <- computeWeightedDeviations(se, ukbb, background_peaks = assay(bgdPeaks))
zdf <- reshape2::melt(t(assays(ukbb_wDEV)[["z"]]))
zdf[,2] <- gsub("_PP001", "", zdf[,2])
colnames(zdf) <- c("ct", "tr", "Zscore")
head(zdf, 25)
# ct tr Zscore
# 1 PBMCSmall#TATCTGTAGACAGCTG-1 BASO_COUNT -0.30860295
# 2 PBMCSmall#ATGGATCCAGGCAAGT-1 BASO_COUNT -0.39312881
# 3 PBMCSmall#CACATGAAGGCCTAAG-1 BASO_COUNT -0.30409017
# 4 PBMCSmall#CAAAGCTGTCTGATTG-1 BASO_COUNT -0.13592679
# 5 PBMCSmall#CTCATGCAGCAGTAGC-1 BASO_COUNT -0.54970951
# 6 PBMCSmall#CTGAATGAGCAGAATT-1 BASO_COUNT -0.41639709
# 7 PBMCSmall#TCAGGTAAGAGCAGCT-1 BASO_COUNT -0.66608152
# 8 PBMCSmall#CCCAGAGTCTTTATCG-1 BASO_COUNT -0.43582579
# 9 PBMCSmall#AGGCCCAAGTCTGCTA-1 BASO_COUNT 0.25856140
# 10 PBMCSmall#ATTTGTCCAATGCCAT-1 BASO_COUNT -0.22555038
# 11 PBMCSmall#GTCACGGAGCTCGGCT-1 BASO_COUNT -0.26471028
# 12 PBMCSmall#CGCAGGTAGGAGTCTG-1 BASO_COUNT -0.17128822
# 13 PBMCSmall#CCTGCTACAATGGCAG-1 BASO_COUNT -0.27680395
# 14 PBMCSmall#TTACGGAAGAGGTCCA-1 BASO_COUNT -0.47225230
# 15 PBMCSmall#GCACGGTCAACACGGA-1 BASO_COUNT -0.68415552
# 16 PBMCSmall#GTCACTCAGCTCCATA-1 BASO_COUNT 0.15649160
# 17 PBMCSmall#AGTTTGGAGCAATGTA-1 BASO_COUNT -0.37026848
# 18 PBMCSmall#ACGTTAGGTGCACTTA-1 BASO_COUNT -0.27937833
# 19 PBMCSmall#TAATCGGGTTCTCGAA-1 BASO_COUNT -0.34514003
# 20 PBMCSmall#TTGTTGTTCGACTATG-1 BASO_COUNT -0.12945685
# 21 PBMCSmall#AAATGAGCAGATGGCA-1 BASO_COUNT -0.26072312
# 22 PBMCSmall#GGTAGGAAGTAGTGTA-1 BASO_COUNT -0.04414560
# 23 PBMCSmall#TTACGGACATCGCCTT-1 BASO_COUNT -0.04297001
# 24 PBMCSmall#CACATGATCCTGGAAT-1 BASO_COUNT -0.45827856
# 25 PBMCSmall#CATGCCTCATTTGTTC-1 BASO_COUNT -0.29474687
################
# ArchR
################
#Compute With ArchR
proj <- addDeviationsMatrix(proj, matches = ukbb, matrixName = "UKBBMatrix")
seUK <- getMatrixFromProject(proj, "UKBBMatrix")
zdf2 <- reshape2::melt(t(as.matrix(assays(seUK)[["z"]])))
zdf2[,2] <- gsub("_PP001", "", zdf2[,2])
colnames(zdf2) <- c("ct", "tr", "Zscore")
head(zdf2, 25)
# ct tr Zscore
# 1 PBMCSmall#TATCTGTAGACAGCTG-1 BASO_COUNT -0.30860295
# 2 PBMCSmall#ATGGATCCAGGCAAGT-1 BASO_COUNT -0.39312881
# 3 PBMCSmall#CACATGAAGGCCTAAG-1 BASO_COUNT -0.30409017
# 4 PBMCSmall#CAAAGCTGTCTGATTG-1 BASO_COUNT -0.13592679
# 5 PBMCSmall#CTCATGCAGCAGTAGC-1 BASO_COUNT -0.54970951
# 6 PBMCSmall#CTGAATGAGCAGAATT-1 BASO_COUNT -0.41639709
# 7 PBMCSmall#TCAGGTAAGAGCAGCT-1 BASO_COUNT -0.66608152
# 8 PBMCSmall#CCCAGAGTCTTTATCG-1 BASO_COUNT -0.43582579
# 9 PBMCSmall#AGGCCCAAGTCTGCTA-1 BASO_COUNT 0.25856140
# 10 PBMCSmall#ATTTGTCCAATGCCAT-1 BASO_COUNT -0.22555038
# 11 PBMCSmall#GTCACGGAGCTCGGCT-1 BASO_COUNT -0.26471028
# 12 PBMCSmall#CGCAGGTAGGAGTCTG-1 BASO_COUNT -0.17128822
# 13 PBMCSmall#CCTGCTACAATGGCAG-1 BASO_COUNT -0.27680395
# 14 PBMCSmall#TTACGGAAGAGGTCCA-1 BASO_COUNT -0.47225230
# 15 PBMCSmall#GCACGGTCAACACGGA-1 BASO_COUNT -0.68415552
# 16 PBMCSmall#GTCACTCAGCTCCATA-1 BASO_COUNT 0.15649160
# 17 PBMCSmall#AGTTTGGAGCAATGTA-1 BASO_COUNT -0.37026848
# 18 PBMCSmall#ACGTTAGGTGCACTTA-1 BASO_COUNT -0.27937833
# 19 PBMCSmall#TAATCGGGTTCTCGAA-1 BASO_COUNT -0.34514003
# 20 PBMCSmall#TTGTTGTTCGACTATG-1 BASO_COUNT -0.12945685
# 21 PBMCSmall#AAATGAGCAGATGGCA-1 BASO_COUNT -0.26072312
# 22 PBMCSmall#GGTAGGAAGTAGTGTA-1 BASO_COUNT -0.04414560
# 23 PBMCSmall#TTACGGACATCGCCTT-1 BASO_COUNT -0.04297001
# 24 PBMCSmall#CACATGATCCTGGAAT-1 BASO_COUNT -0.45827856
# 25 PBMCSmall#CATGCCTCATTTGTTC-1 BASO_COUNT -0.29474687
################
# Test Equivalent
################
identical(round(zdf2[,3], 5), round(zdf[,3], 5))
#[1] TRUE
Best
Jeff
I am closing this issue. Feel free to open new ones!
Hi all, thank you for an amazing, easy to use and well-documented package that allows relative bioinformatics newbies to start doing quite sophisticated analyses.
We are a GWAS lab and would be interested in looking at cell type enrichment for our association signals. We routinely do this with bulk ATAC-Seq datasets but would be interested in reproducing the kind of analysis in figure 6 of this paper (https://www.nature.com/articles/s41588-019-0362-6) but for much larger scATAC-Seq datasets (eg the hematopoiesis dataset produced by your lab in Granja et al 2019).
Are there any plans to add g-chromVar on top of your existing chromVar implementation? Would it be straightforward to just do this on my own without you having to add a specific function to the package?