Closed sropri92 closed 4 months ago
Hi @sropri92, can you provide the full code you're running?
Yes Tim,
# List of sample names
sample_names <- c("BRCA1_E", "BRCA1_G_a", "BRCA1_G_b", "BRCA2_D","WT_12","WT_B","WT_C","WT_D" )
# Iterate over each sample
for (sample_name in sample_names) {
# Change directory to the sample's directory
setwd(file.path("C:/Users/sropr/OneDrive/Documents/scATAC/carmans/", sample_name))
peaks <- read.table(
file = "peaks.bed",
col.names = c("chr", "start", "end")
)
# Make sure the Seurat object for each sample has a unique name
assign(paste0("peaks_", sample_name), peaks)
}
# convert to genomic ranges
peaks_BRCA1_E <- makeGRangesFromDataFrame(peaks_BRCA1_E)
peaks_BRCA1_G_a <- makeGRangesFromDataFrame(peaks_BRCA1_G_a)
peaks_BRCA1_G_b <- makeGRangesFromDataFrame(peaks_BRCA1_G_b)
peaks_BRCA2_D <- makeGRangesFromDataFrame(peaks_BRCA2_D)
peaks_WT_12 <- makeGRangesFromDataFrame(peaks_WT_12)
peaks_WT_B <- makeGRangesFromDataFrame(peaks_WT_B)
peaks_WT_C <- makeGRangesFromDataFrame(peaks_WT_C)
peaks_WT_D <- makeGRangesFromDataFrame(peaks_WT_D)
# Create a unified set of peaks to quantify in each dataset
combined.peaks <- reduce(x = c(peaks_BRCA1_E,peaks_BRCA1_G_a,peaks_BRCA1_G_b,peaks_BRCA2_D,peaks_WT_12,peaks_WT_B,
peaks_WT_C,peaks_WT_D))
# Filter out bad peaks based on length
peakwidths <- width(combined.peaks)
combined.peaks <- combined.peaks[peakwidths < 10000 & peakwidths > 20]
combined.peaks
for (sample_name in sample_names) {
# Change directory to the sample's directory
setwd(file.path("C:/Users/sropr/OneDrive/Documents/scATAC/carmans/", sample_name))
md <- read.table(
file = "singlecell.csv",
stringsAsFactors = FALSE,
sep = ",",
header = TRUE,
row.names = 1
)[-1, ] # remove the first row
md <- md[md$passed_filters > 500, ] # Ali: can adjust accordingly, might be better to ignore this and do the filtering after getting the Violin Plot
frags <- CreateFragmentObject(
path = "fragments.tsv.gz",
cells = rownames(md)
)
counts <- FeatureMatrix(
fragments = frags,
features = combined.peaks,
cells = rownames(md)
)
assay <- CreateChromatinAssay(counts, fragments = frags)
seurat <- CreateSeuratObject(assay, assay = "peaks", meta.data=md, project = sample_name)
# extract gene annotations from EnsDb
annotations <- GetGRangesFromEnsDb(ensdb = EnsDb.Hsapiens.v86)
# change to UCSC style since the data was mapped to hg38
seqlevels(annotations) <- paste0('chr', seqlevels(annotations))
genome(annotations) <- "hg38"
# add the gene information to the object
Annotation(seurat) <- annotations
head(Annotation(seurat))
head(Fragments(seurat)[[1]])
# compute nucleosome signal score per cell
seurat <- NucleosomeSignal(object = seurat)
# compute TSS enrichment score per cell
seurat <- TSSEnrichment(object = seurat, fast = FALSE) # It used to be fast=TRUE which did not allow me to build the enrichment matrix
# add blacklist ratio and fraction of reads in peaks
seurat$pct_reads_in_peaks <- seurat$peak_region_fragments / seurat$passed_filters * 100
# Blacklist fraction function
seurat$blacklist_fraction <- FractionCountsInRegion(
object = seurat,
assay = 'peaks',
regions = blacklist_hg38
)
#DensityScatter(seurat, x = 'nCount_peaks', y = 'TSS.enrichment', log_x = TRUE, quantiles = TRUE)
seurat$high.tss <- ifelse(seurat$TSS.enrichment > 3, 'High', 'Low')
#TSSPlot(seurat, group.by = 'high.tss') + NoLegend()
seurat$nucleosome_group <- ifelse(seurat$nucleosome_signal > 3, 'NS > 3', 'NS < 3')
#FragmentHistogram(object = seurat, group.by = 'nucleosome_group')
seurat <- subset(
x = seurat,
subset = nCount_peaks > 100 &
nCount_peaks < 30000 &
pct_reads_in_peaks > 10 &
blacklist_fraction < 0.1 &
nucleosome_signal < 4 &
TSS.enrichment > 1
)
#seurat <- RunTFIDF(seurat)
#seurat <- FindTopFeatures(seurat, min.cutoff = 10)
#seurat <- RunSVD(seurat ,assay = 'peaks')
# Make sure the Seurat object for each sample has a unique name
assign(paste0("seurat_", sample_name), seurat)
}
seurat_BRCA1_E$dataset <- sample_names[1]
seurat_BRCA1_G_a$dataset <- sample_names[2]
seurat_BRCA1_G_b$dataset <- sample_names[3]
seurat_BRCA2_D$dataset <- sample_names[4]
seurat_WT_12$dataset <- sample_names[5]
seurat_WT_B$dataset <- sample_names[6]
seurat_WT_C$dataset <- sample_names[7]
seurat_WT_D$dataset <- sample_names[8]
colnames(seurat_BRCA1_E) <- paste0(sample_names[1], "_", colnames(seurat_BRCA1_E))
colnames(seurat_BRCA1_G_a) <- paste0(sample_names[2], "_", colnames(seurat_BRCA1_G_a))
colnames(seurat_BRCA1_G_b) <- paste0(sample_names[3], "_", colnames(seurat_BRCA1_G_b))
colnames(seurat_BRCA2_D) <- paste0(sample_names[4], "_", colnames(seurat_BRCA2_D))
colnames(seurat_WT_12) <- paste0(sample_names[5], "_", colnames(seurat_WT_12))
colnames(seurat_WT_B) <- paste0(sample_names[6], "_", colnames(seurat_WT_B))
colnames(seurat_WT_C) <- paste0(sample_names[7], "_", colnames(seurat_WT_C))
colnames(seurat_WT_D) <- paste0(sample_names[8], "_", colnames(seurat_WT_D))
# merge all datasets, adding a cell ID to make sure cell names are unique
combined <- merge(
x = seurat_BRCA1_E,
y = list(seurat_BRCA1_G_a, seurat_BRCA1_G_b, seurat_BRCA2_D, seurat_WT_12,seurat_WT_B,seurat_WT_C,seurat_WT_D)
)
combined[["peaks"]]
mySeurat <- combined
rm(combined)
gc()
#mySeurat1 <- subset(
#x = mySeurat,
#subset = nCount_peaks > 100 &
#nCount_peaks < 30000 &
#pct_reads_in_peaks > 10 &
#blacklist_fraction < 0.1 &
#nucleosome_signal < 4 &
#TSS.enrichment > 1
#)
#mySeurat1
mySeurat <- RunTFIDF(mySeurat)
mySeurat <- FindTopFeatures(mySeurat, min.cutoff = 'q0')
mySeurat <- RunSVD(mySeurat,assay = 'peaks')
DepthCor(mySeurat, reduction = 'lsi', n=50) #
DefaultAssay(mySeurat) <- 'peaks'
mySeurat <- RunUMAP(object = mySeurat, reduction = 'lsi', dims = 2:37)
DimPlot(mySeurat, group.by = 'dataset', pt.size = 0.1)
gc()
# find integration anchors
integration.anchors <- FindIntegrationAnchors(
object.list = list(seurat_BRCA1_E,seurat_BRCA1_G_a, seurat_BRCA1_G_b, seurat_BRCA2_D, seurat_WT_12,seurat_WT_B,seurat_WT_C,seurat_WT_D),
anchor.features = rownames(seurat_BRCA1_E),
reduction = "rlsi",
dims = 2:37
) # this is where I am getting the error of subscript out of bounds. Trying to solve this and will keep you posted
gc()
I think you need to uncomment those lines:
#seurat <- RunTFIDF(seurat)
#seurat <- FindTopFeatures(seurat, min.cutoff = 10)
#seurat <- RunSVD(seurat ,assay = 'peaks')
so that LSI is computed for each object, which is needed for rlsi in the integration
Hi Tim,
I actually had those uncommented when running my original code. These were just commented by me now. Do you think not running the reductions on each individual object might work and trying the rlsi reductions in the FindIntegrationAnchors instead?
Sorry for the confusion, but the rest of the code I ran as is.
Ok, I don't see any issues with the code then. This might be related to a bug in Seurat, the error message is similar to that reported in this issue: https://github.com/satijalab/seurat/issues/8561
I'd suggest opening an issue in Seurat with a reproducible example
Ok, thank you for taking the time to look this over. Will open the issue with seurat and see if that helps.
I had a related error, and was able to solve it by removing the Motif Object
(set the seurObj[["ChrAssay"]]@motifs = NULL
). Maybe that would help? Afterwards, you can add that motif object back to the seurat object again.
Thanks @OneHitKO that's good to know!
@sropri92 did you have motifs stored in your object? If so you could try removing and see if that could be a workaround until the issue in Seurat is fixed
@timoast, since we're on the topic of MotifObject
, would you be able to quickly check issue #1657 ? I'm not sure if the way I had added/created it led to downstream subsetting problems in the first place. Thanks for your help (and for developing a great package)!
Going to close this now as it should be an issue with Seurat rather than Signac
I am running FindIntegrationAnchors to integrate my 8 scATAC datasets according to the tutorial on this site:
https://stuartlab.org/signac/articles/integrate_atac
However, I run the following commands and I get this error.
I have seen others having this issue but am not sure what the solution is. I have tried changing the reduction to PCA and rpca for integration and even cca and still getting the same error. I have checked and all rownames of all my seurat objects are the same as I merged the peak regions to have identical regions for all the seurat objects in my objects.list. Any help would be appreciated.