aryeelab / diffloop-paper

Sample Analysis for diffloop paper
0 stars 0 forks source link

Diffloop Visualization of interactions #3

Open koushik20 opened 2 years ago

koushik20 commented 2 years ago

Hi,

First, thank you for this wonderful package and documentation.

I am working on finding a differential loop calling for my 4 samples (8 reps) data using diffloop and below is my loopobject `> dim(km_anno)

anchors interactions samples colData rowData 1 49597 96655 8 2 7`

but when I do visualization of the interaction by using plotTopLoops() or loopPlot() as I understand giving me an aggregate of interactions. Attached is an example pic for reference. Is there any other way to view each and every interaction rather than aggregate interactions that make this plot look almost the same across the cell lines? Thank you!

Screen Shot 2022-04-19 at 3 37 24 PM
caleblareau commented 2 years ago

Hi and thanks for the nice words. I’m not sure that aggregate is quite accurate— it should plot everything in that region. Diffloop calls the plotBedpe function in sushi, that I don’t think does any filtering. Can you try zooming into a region and seeing if you see more loops perhaps?

One thing to note is that we square root transform the thickness and height, so loops that are more different may appear more similar.

Caleb

On Apr 19, 2022, at 3:58 PM, Koushik Ayaluri @.**@.>> wrote:

Hi,

First, thank you for this wonderful package and documentation.

I am working on finding a differential loop calling for my 4 samples (8 reps) data using diffloop and below is my loopobject `> dim(km_anno)

anchors interactions samples colData rowData 1 49597 96655 8 2 7`

but when I do visualization of the interaction by using plotTopLoops() or loopPlot() as I understand giving me an aggregate of interactions. Attached is an example pic for reference. Is there any other way to view each and every interaction rather than aggregate interactions that make this plot look almost the same across the cell lines? Thank you! [Screen Shot 2022-04-19 at 3 37 24 PM]https://user-images.githubusercontent.com/57368663/164114453-6ff55ebc-a3b2-4d52-8dfc-f041eb7061ce.png

— Reply to this email directly, view it on GitHubhttps://github.com/aryeelab/diffloop-paper/issues/3, or unsubscribehttps://github.com/notifications/unsubscribe-auth/AD32FYMRN5ZTBVNEAVAGRDLVF43ATANCNFSM5T2IWPWA. You are receiving this because you are subscribed to this thread.Message ID: @.***>

koushik20 commented 2 years ago

I didn't find any difference while zooming in on the plots. But thanks for the claratification on how the loops are plotted.

koushik20 commented 2 years ago

This is the code I am using to generate the loops. Am I doing something wrong?


## loading packages
library(diffloop)
library(diffloopdata)
library(ggplot2)
library(GenomicRanges)
library(ggrepel)
library(DESeq2)
library(dplyr)

## diffloop downstream analysis with bedpe files
bed_dir <- file.path("/Users/ayalurik/Documents/HiChIP_diffloop/diffloop")
bed_dir

samples <- c("kura_1","kura_2","uwb_1","uwb_2",
             "ft246_1","ft246_2","ft33_1","ft33_2")
full <- loopsMake(bed_dir, samples)
celltypes <- c("kura", "kura", "uwb", "uwb", "ft246", "ft246", "ft33", "ft33")
full <- updateLDGroups(full, celltypes)
head(full, 4)
dim(full)

## Quality control
# remove and loops that merged together from import
full1 <- subsetLoops(full, full@rowData$loopWidth >= 5000)
dim(full1)

# Filtering loops that are FDR > 0.01
noCNV_corrected <- mangoCorrection(full1, FDR = 0.01)
dim(noCNV_corrected)

# filtering loops that are present strongly (>= 5 PETs) in one replicate
# but absent (== 0 PETs) in the other replicate
cm <- noCNV_corrected@counts
k_dis <- ((cm[,1]>=5 &cm[,2]==0)|(cm[,2]>=5&cm[,1]==0))
m_dis <- ((cm[,3]>=5 &cm[,4]==0)|(cm[,4]>=5&cm[,3]==0))
qc_filt <- subsetLoops(noCNV_corrected, !(k_dis | m_dis))
dim(qc_filt)

p1 <- loopDistancePlot(qc_filt)
p1

## counts the number of loops for each sample and returns whether they are single,
## self, unique or none
loopMetrics(qc_filt)

# PC plot
pcp1dat <- qc_filt
pcp1dat@colData$sizeFactor <- 1
samples <- c("kura_1", "kura_2", "uwb_1", "uwb_2",
             "ft246_1", "ft246_2", "ft33_1", "ft33_2")
pcp1 <- pcaPlot(pcp1dat) + geom_text_repel(aes(label=samples)) + 
  ggtitle("PC Plot with no
  Size Factor Correction") + theme(legend.position="none")
pcp1

samples <- c("kura_1", "kura_2", "uwb_1", "uwb_2",
             "ft246_1", "ft246_2", "ft33_1", "ft33_2")
pcp2 <- pcaPlot(qc_filt) + geom_text_repel(aes(label=samples)) + 
  ggtitle("PC Plot with Size Factor Correction") +
  theme(legend.position = "none")
pcp2

## Differential Loop Calling
km_filt <- qc_filt
dim(km_filt)

# First model using edgeR over-dispersed Poisson regression
km_res <- loopAssoc(km_filt, method = "edgeR", coef = 2)
head(km_res@rowData)

## Epigenetic Annotation
h3dir <- file.path("/Users/ayalurik/Documents/HiChIP_diffloop")
kura <- paste0(h3dir, "/", "kura_H3k27ac_hg19_narrowpeak.bed")
uwb <- paste0(h3dir, "/", "UWB_H3k27ac_hg19_narrowpeak.bed")
ft246 <- paste0(h3dir, "/", "FT33_H3k27ac_hg19_narrowpeak.bed")
ft33 <- paste0(h3dir, "/", "FT246_H3k27ac_hg19_narrowpeak.bed")

h3k27ac.k <- rmchr(padGRanges(bedToGRanges(kura), pad = 1000))
h3k27ac.u <- rmchr(padGRanges(bedToGRanges(uwb), pad = 1000))
h3k27ac.f2 <- rmchr(padGRanges(bedToGRanges(ft246), pad = 1000))
h3k27ac.f3 <- rmchr(padGRanges(bedToGRanges(ft33), pad = 1000))
ku <- GenomicRanges::union(h3k27ac.k, h3k27ac.u)
ft <- GenomicRanges::union(h3k27ac.f2, h3k27ac.f3)
enhancer <- GenomicRanges::union(ku,ft)
promoter <- padGRanges(getHumanTSS(), pad = 1000)
km_anno <- annotateLoops(km_res, enhancer = enhancer, promoter = promoter)

# No.of differential loops when FDR <= 0.05
diflop <- subsetLoops(km_res, km_res@rowData$FDR <= 0.05)

# exploring out results
summary <- as.data.frame(km_anno@anchors)
stats <- as.data.frame(km_anno@rowData)
interactions <- as.data.frame(km_anno@interactions)

## saving a rda file
# save(km_anno, file = "diffloop_Kura_UWB_vs_FT246_FT33.rda")

## Visulaization
chr20reg <- GRanges(seqnames = c("19"), 
                    ranges = IRanges(start = c(42347500),end = c(49542500)))
p3 <- loopPlot(km_anno, chr20reg)

plotTopLoops(km_anno, organism = "h", FDR = 0.01, n = 10)

manyLoopPlots(km_anno, chr20reg)

Thank you!

caleblareau commented 2 years ago

That all looks reasonable to me; can you run summary() of the loops object toward the end and then using base R or dplyr, filer for your region of interest? You can see the table which may tell you how many loops there are in the region of interest.

Caleb

On Apr 22, 2022, at 9:50 AM, Koushik Ayaluri @.**@.>> wrote:

This is the code I am using to generate the loops. Am I doing something wrong?

loading packages

library(diffloop) library(diffloopdata) library(ggplot2) library(GenomicRanges) library(ggrepel) library(DESeq2) library(dplyr)

diffloop downstream analysis with bedpe files

bed_dir <- file.path("/Users/ayalurik/Documents/HiChIP_diffloop/diffloop") bed_dir

samples <- c("kura_1","kura_2","uwb_1","uwb_2", "ft246_1","ft246_2","ft33_1","ft33_2") full <- loopsMake(bed_dir, samples) celltypes <- c("kura", "kura", "uwb", "uwb", "ft246", "ft246", "ft33", "ft33") full <- updateLDGroups(full, celltypes) head(full, 4) dim(full)

Quality control

remove and loops that merged together from import

full1 <- subsetLoops(full, @.***$loopWidth >= 5000) dim(full1)

Filtering loops that are FDR > 0.01

noCNV_corrected <- mangoCorrection(full1, FDR = 0.01) dim(noCNV_corrected)

filtering loops that are present strongly (>= 5 PETs) in one replicate

but absent (== 0 PETs) in the other replicate

cm <- @.*** k_dis <- ((cm[,1]>=5 &cm[,2]==0)|(cm[,2]>=5&cm[,1]==0)) m_dis <- ((cm[,3]>=5 &cm[,4]==0)|(cm[,4]>=5&cm[,3]==0)) qc_filt <- subsetLoops(noCNV_corrected, !(k_dis | m_dis)) dim(qc_filt)

p1 <- loopDistancePlot(qc_filt) p1

counts the number of loops for each sample and returns whether they are single,

self, unique or none

loopMetrics(qc_filt)

PC plot

pcp1dat <- qc_filt @.***$sizeFactor <- 1 samples <- c("kura_1", "kura_2", "uwb_1", "uwb_2", "ft246_1", "ft246_2", "ft33_1", "ft33_2") pcp1 <- pcaPlot(pcp1dat) + geom_text_repel(aes(label=samples)) + ggtitle("PC Plot with no Size Factor Correction") + theme(legend.position="none") pcp1

samples <- c("kura_1", "kura_2", "uwb_1", "uwb_2", "ft246_1", "ft246_2", "ft33_1", "ft33_2") pcp2 <- pcaPlot(qc_filt) + geom_text_repel(aes(label=samples)) + ggtitle("PC Plot with Size Factor Correction") + theme(legend.position = "none") pcp2

Differential Loop Calling

km_filt <- qc_filt dim(km_filt)

First model using edgeR over-dispersed Poisson regression

km_res <- loopAssoc(km_filt, method = "edgeR", coef = 2) @.***)

Epigenetic Annotation

h3dir <- file.path("/Users/ayalurik/Documents/HiChIP_diffloop") kura <- paste0(h3dir, "/", "kura_H3k27ac_hg19_narrowpeak.bed") uwb <- paste0(h3dir, "/", "UWB_H3k27ac_hg19_narrowpeak.bed") ft246 <- paste0(h3dir, "/", "FT33_H3k27ac_hg19_narrowpeak.bed") ft33 <- paste0(h3dir, "/", "FT246_H3k27ac_hg19_narrowpeak.bed")

h3k27ac.k <- rmchr(padGRanges(bedToGRanges(kura), pad = 1000)) h3k27ac.u <- rmchr(padGRanges(bedToGRanges(uwb), pad = 1000)) h3k27ac.f2 <- rmchr(padGRanges(bedToGRanges(ft246), pad = 1000)) h3k27ac.f3 <- rmchr(padGRanges(bedToGRanges(ft33), pad = 1000)) ku <- GenomicRanges::union(h3k27ac.k, h3k27ac.u) ft <- GenomicRanges::union(h3k27ac.f2, h3k27ac.f3) enhancer <- GenomicRanges::union(ku,ft) promoter <- padGRanges(getHumanTSS(), pad = 1000) km_anno <- annotateLoops(km_res, enhancer = enhancer, promoter = promoter)

No.of differential loops when FDR <= 0.05

diflop <- subsetLoops(km_res, @.***$FDR <= 0.05)

exploring out results

summary <- @.) stats <- @.) interactions <- @.***)

saving a rda file

save(km_anno, file = "diffloop_Kura_UWB_vs_FT246_FT33.rda")

Visulaization

chr20reg <- GRanges(seqnames = c("19"), ranges = IRanges(start = c(42347500),end = c(49542500))) p3 <- loopPlot(km_anno, chr20reg)

plotTopLoops(km_anno, organism = "h", FDR = 0.01, n = 10)

manyLoopPlots(km_anno, chr20reg)

Thank you!

— Reply to this email directly, view it on GitHubhttps://github.com/aryeelab/diffloop-paper/issues/3#issuecomment-1106690681, or unsubscribehttps://github.com/notifications/unsubscribe-auth/AD32FYKOLPOUSUAPLVLWIZLVGLKEBANCNFSM5T2IWPWA. You are receiving this because you commented.Message ID: @.***>

koushik20 commented 2 years ago

Thank you for your quick response. Sure will run and check.