stuart-lab / signac

R toolkit for the analysis of single-cell chromatin data
https://stuartlab.org/signac/
Other
327 stars 88 forks source link

Warning message when using AddMotifs #729

Closed neurocycology closed 3 years ago

neurocycology commented 3 years ago

Hello, since using ouput from the GRCh38 reference build for cellranger-atac 2.0.0 (refdata-cellranger-arc-GRCh38-2020-A-2.0.0), I am having issues related to scaffold annotation specifically during the "Creating Motif object" stage of the AddMotifs code block. You'll see at the end of the code that I've tried to remove scaffold peaks as you described #486, and this allows the AddMotifs function to run past the Building Motif matrix block before throwing up the error I show at the bottom ... have I missed an easy step during annotation here? Thanks in advance for any help.

Minimum reproducible code assuming merge vignette:

library(Seurat)
library(Signac)
library(GenomeInfoDb)
library(EnsDb.Hsapiens.v86)
library(GenomicRanges)
library(BSgenome.Hsapiens.UCSC.hg38)
library(JASPAR2020)
library(motifmatchr)
library(TFBSTools)
library(ggplot2)
library(patchwork)
set.seed(1234)

peaks1 <- read.table(
  file = "sample1/peaks.bed",
  col.names = c("chr", "start", "end")
)

peaks2 <- read.table(
  file = "sample2/peaks.bed",
  col.names = c("chr", "start", "end")
)

gr.1 <- makeGRangesFromDataFrame(peaks1)
gr.2 <- makeGRangesFromDataFrame(peaks2)

combined.peaks <- reduce(x = c(peaks1, peaks2))

peakwidths <- width(combined.peaks)
combined.peaks <- combined.peaks[peakwidths  < 10000 & peakwidths > 20]
combined.peaks

md.1 <- read.table(
  file = "/sample1/singlecell.csv",
  stringsAsFactors = FALSE,
  sep = ",",
  header = TRUE,
  row.names = 1
)[-1, ]

md.2 <- read.table(
  file = "/sample2/singlecell.csv",
  stringsAsFactors = FALSE,
  sep = ",",
  header = TRUE,
  row.names = 1
)[-1, ]

md.1 <- md.1[md.1$passed_filters > 500, ]
md.2 <- md.2[md.2$passed_filters > 500, ]

frags.1 <- CreateFragmentObject(
  path = "/sample1/fragments.tsv.gz",
  cells = rownames(md.1)
)

frags.2 <- CreateFragmentObject(
  path = "/sample1/fragments.tsv.gz",
  cells = rownames(md.2)
)

counts.1 <- FeatureMatrix(
  fragments = frags.1,
  features = combined.peaks,
  cells = rownames(md.1)
)

counts.2 <- FeatureMatrix(
  fragments = frags.2,
  features = combined.peaks,
  cells = rownames(md.2)
)

sample1_assay <- CreateChromatinAssay(counts.1, fragments = frags.1)
sample1_ATAC <- CreateSeuratObject(sample1_assay, assay = "ATAC", meta.data = md.1, project = "sample1")

sample2_assay <- CreateChromatinAssay(counts.2, fragments = frags.2)
sample2_ATAC <- CreateSeuratObject(sample2_assay, assay = "ATAC", meta.data = md.2, project = "sample2")

sample1_ATAC$sample <- 'sample1'
sample2_ATAC$sample <- 'sample2'

combined <- merge(x=sample1_ATAC,
                  y=sample2_ATAC,
                  add.cell.ids=c("s1","s2"))

combined <- RunTFIDF(combined)
combined <- FindTopFeatures(combined, min.cutoff = 20)
combined <- RunSVD(combined)
combined <- RunUMAP(combined, dims = 2:50, reduction = 'lsi')

annotations <- GetGRangesFromEnsDb(ensdb = EnsDb.Hsapiens.v86)
seqlevelsStyle(annotations) <- 'UCSC'
genome(annotations) <- "hg38"
Annotation(combined) <- annotations

main.chroms <- standardChromosomes(BSgenome.Hsapiens.UCSC.hg38)
keep.peaks <- as.logical(seqnames(granges(combined)) %in% main.chroms)
combined <- combined[keep.peaks, ]

pfm <- getMatrixSet(x = JASPAR2020,opts = list(species = 9606, all_versions = FALSE))
combined <- AddMotifs(combined, genome=BSgenome.Hsapiens.UCSC.hg38, pfm=pfm)

Error:

Building motif matrix
Finding motif positions
Creating Motif object
Warning messages:
1: In .Seqinfo.mergexy(x, y) :
  Each of the 2 combined objects has sequence levels not in the other:
  - in 'x': chrM, chr1_GL383518v1_alt, chr1_GL383519v1_alt, chr1_GL383520v2_alt, chr1_KI270759v1_alt, chr1_KI270760v1_alt, chr1_KI270761v1_alt, chr1_KI270762v1_alt, chr1_KI270763v1_alt, chr1_KI270764v1_alt, chr1_KI270765v1_alt, chr1_KI270766v1_alt, chr1_KI270892v1_alt, chr2_GL383521v1_alt, chr2_GL383522v1_alt, chr2_GL582966v2_alt, chr2_KI270767v1_alt, chr2_KI270768v1_alt, chr2_KI270769v1_alt, chr2_KI270770v1_alt, chr2_KI270771v1_alt, chr2_KI270772v1_alt, chr2_KI270773v1_alt, chr2_KI270774v1_alt, chr2_KI270775v1_alt, chr2_KI270776v1_alt, chr2_KI270893v1_alt, chr2_KI270894v1_alt, chr3_GL383526v1_alt, chr3_JH636055v2_alt, chr3_KI270777v1_alt, chr3_KI270778v1_alt, chr3_KI270779v1_alt, chr3_KI270780v1_alt, chr3_KI270781v1_alt, chr3_KI270782v1_alt, chr3_KI270783v1_alt, chr3_KI270784v1_alt, chr3_KI270895v1_alt, chr3_KI270924v1_alt, chr3_KI270934v1_alt, chr3_KI270935v1_alt, chr3_KI270936v1_alt, chr3_KI270937v1_alt, chr4_GL000 [... truncated]
2: In .Seqinfo.mergexy(x, y) :
  Each of the 2 combined objects has sequence levels not in the other:
  - in 'x': chrM, chr1_GL383518v1_alt, chr1_GL383519v1_alt, chr1_GL383520v2_alt, chr1_KI270759v1_alt, chr1_KI270760v1_alt, chr1_KI270761v1_alt, chr1_KI270762v1_alt, chr1_KI270763v1_alt, chr1_KI270764v1_alt, chr1_KI270765v1_alt, chr1_KI270766v1_alt, chr1_KI270892v1_alt, chr2_GL383521v1_alt, chr2_GL383522v1_alt, chr2_GL582966v2_alt, chr2_KI270767v1_alt, chr2_KI270768v1_alt, chr2_KI270769v1_alt, chr2_KI270770v1_alt, chr2_KI270771v1_alt, chr2_KI270772v1_alt, chr2_KI270773v1_alt, chr2_KI270774v1_alt, chr2_KI270775v1_alt, chr2_KI270776v1_alt, chr2_KI270893v1_alt, chr2_KI270894v1_alt, chr3_GL383526v1_alt, chr3_JH636055v2_alt, chr3_KI270777v1_alt, chr3_KI270778v1_alt, chr3_KI270779v1_alt, chr3_KI270780v1_alt, chr3_KI270781v1_alt, chr3_KI270782v1_alt, chr3_KI270783v1_alt, chr3_KI270784v1_alt, chr3_KI270895v1_alt, chr3_KI270924v1_alt, chr3_KI270934v1_alt, chr3_KI270935v1_alt, chr3_KI270936v1_alt, chr3_KI270937v1_alt, chr4_GL000 [... truncated]
3: In .Seqinfo.mergexy(x, y) :
  Each of the 2 combined objects has sequence levels not in the other:
  - in 'x': chrM, chr1_GL383518v1_alt, chr1_GL383519v1_alt, chr1_GL383520v2_alt, chr1_KI270759v1_alt, chr1_KI270760v1_alt, chr1_KI270761v1_alt, chr1_KI270762v1_alt, chr1_KI270763v1_alt, chr1_KI270764v1_alt, chr1_KI270765v1_alt, chr1_KI270766v1_alt, chr1_KI270892v1_alt, chr2_GL383521v1_alt, chr2_GL383522v1_alt, chr2_GL582966v2_alt, chr2_KI270767v1_alt, chr2_KI270768v1_alt, chr2_KI270769v1_alt, chr2_KI270770v1_alt, chr2_KI270771v1_alt, chr2_KI270772v1_alt, chr2_KI270773v1_alt, chr2_KI270774v1_alt, chr2_KI270775v1_alt, chr2_KI270776v1_alt, chr2_KI270893v1_alt, chr2_KI270894v1_alt, chr3_GL383526v1_alt, chr3_JH636055v2_alt, chr3_KI270777v1_alt, chr3_KI270778v1_alt, chr3_KI270779v1_alt, chr3_KI270780v1_alt, chr3_KI270781v1_alt, chr3_KI270782v1_alt, chr3_KI270783v1_alt, chr3_KI270784v1_alt, chr3_KI270895v1_alt, chr3_KI270924v1_alt, chr3_KI270934v1_alt, chr3_KI270935v1_alt, chr3_KI270936v1_alt, chr3_KI270937v1_alt, chr4_GL000 [... truncated]
neurocycology commented 3 years ago

Closing this. It appears you can just ignore this warning after removing the scaffolds as per the linked post from @timoast described above.