Closed davhg96 closed 9 months ago
Hi, please show the full code you're running, the error message, and the output of sessionInfo()
Hi, here is my code. Starts with creating the objects for each sample extracting human cells and combining them. The feature name change has to be done to be able to merge the objects (from GRCh38- to GRCh38_ ) `library(Seurat) library(Signac) library(tidyverse) library(cowplot) library(rtracklayer)
counts_fib <- Read10X_h5(filename = "./3_Data/ATAC_Fib5K/filtered_peak_bc_matrix.h5") metadata_fib <- read.csv(file = "./3_Data/ATAC_Fib5K/singlecell.csv", header = TRUE, row.names = 1 )
chrom_fib <- CreateChromatinAssay(counts = counts_fib, sep = c(":", "-"), fragments = './3_Data/ATAC_Fib5K/fragments.tsv.gz',
min.cells = 10,
min.features = 200)
fibrin <- CreateSeuratObject(counts = chrom_fib, assay = "ATAC", project = "fibrin_ATAC",meta.data = metadata_fib) fibrin$fibrinset <- 'fibrin' DefaultAssay(fibrin) <- "ATAC"
fibrin[["percent.human"]] <- PercentageFeatureSet(fibrin, pattern = "^GRCh38-") fibrin <- subset(fibrin, subset = percent.human > 90)
fibrin namesfib <- sub("^GRCh38-","GRCh38", rownames(fibrin)) namesfib <- sub("^mm10-","mm10", names_fib)
rownames(fibrin@assays$ATAC@counts) <- names_fib rownames(fibrin@assays$ATAC@data) <- names_fib rownames(fibrin@assays$ATAC@meta.features) <- names_fib
x <- c("GRCh38_KI270721.1","GRCh38_KI270727.1") fibrin <- subset(fibrin,features=setdiff(rownames(fibrin),x))
granges(fibrin)
counts_oss <- Read10X_h5(filename = "./3_Data/ATAC_Oss8K/filtered_peak_bc_matrix.h5") metadata_oss <- read.csv(file = "./3_Data/ATAC_Oss8K/singlecell.csv", header = TRUE, row.names = 1 )
chrom_oss <- CreateChromatinAssay(counts = counts_oss, sep = c(":", "-"), fragments = './3_Data/ATAC_Oss8K/fragments.tsv.gz',
min.cells = 10,
min.features = 200)
oss <- CreateSeuratObject(counts = chrom_oss, assay = "ATAC", project = "Oss_ATAC", meta.data = metadata_oss) oss$dataset <- 'ossigel' DefaultAssay(oss) <- "ATAC"
oss[["percent.human"]] <- PercentageFeatureSet(oss, pattern = "^GRCh38-") oss <- subset(oss, subset = percent.human > 90)
oss
namesoss <- sub("^GRCh38-","GRCh38", rownames(oss)) namesoss <- sub("^mm10-","mm10", names_oss)
rownames(oss@assays$ATAC@counts) <- names_oss rownames(oss@assays$ATAC@data) <- names_oss rownames(oss@assays$ATAC@meta.features) <- names_oss
x <- c("GRCh38_GL000009.2") oss <- subset(oss,features=setdiff(rownames(oss),x)) oss <- oss[!grepl("GRCh38_GL000009.2", rownames(oss)),]
granges(oss) granges(fibrin)
data <- merge(x= fibrin, y=oss,add.cell.ids = c("fibrin","oss")) granges(data)
rm(chrom_fib, chrom_oss, counts_fib, counts_oss, metadata_fib, metadata_oss)
rm(names_fib, names_oss, x)
new.meta.feat.names <- sub("^GRCh38-","GRCh38", rownames(data@assays$ATAC@meta.features)) new.meta.feat.names <- sub("^mm10-" , "mm10", new.meta.feat.names) rownames(data@assays$ATAC@meta.features) <- new.meta.feat.names
new.counts.names <- sub("^GRCh38-","GRCh38", rownames(data@assays$ATAC@counts)) new.counts.names <- sub("^mm10-", "mm10", new.counts.names) rownames(data@assays$ATAC@counts) <- new.counts.names
head(rownames(data@assays$ATAC@meta.features)) head(rownames(data@assays$ATAC@counts)) head(rownames(data@assays$ATAC@data))
annotations <-rtracklayer::import("./3_Data/geneRef/genes.gtf.gz") granges(annotations) genome(annotations) <- "GRCh38/mm10" annotations$gene_biotype <- annotations$gene_type granges(annotations)
data data[["ATAC"]]
head(rownames(data@assays$ATAC@meta.features)) head(rownames(data@assays$ATAC@counts)) head(rownames(data@assays$ATAC@data)) head(Fragments(data)[[1]])
Annotation(data) <- annotations head(Annotation(data)) tail(Annotation(data))
data <- NucleosomeSignal(object = data)
data <- TSSEnrichment(object = data, fast = FALSE)
data$pct_reads_in_peaks <- data$peak_region_fragments / data$passed_filters * 100 data$blacklist_ratio <- data$blacklist_region_fragments / data$peak_region_fragments
DensityScatter(data, x = 'nCount_ATAC', y = 'TSS.enrichment', log_x = TRUE, quantiles = TRUE)
data$high.tss <- ifelse(data$TSS.enrichment > 3, 'High', 'Low')
TSSPlot(data, group.by = 'high.tss') + NoLegend()
data$nucleosome_group <- ifelse(data$nucleosome_signal > 4, 'NS > 4', 'NS < 4')
VlnPlot( object = data, features = c('nCount_ATAC', 'TSS.enrichment', 'blacklist_ratio', 'nucleosome_signal', 'pct_reads_in_peaks'), pt.size = 0.1, ncol = 5 )
data_qc <- subset( x = data, subset = nCount_ATAC > 4000 & nCount_ATAC < 45000 & pct_reads_in_peaks > 25 & blacklist_ratio < 0.05 & nucleosome_signal < 4 & TSS.enrichment > 3 )
data data_qc data[["ATAC"]] data_qc[["ATAC"]]
VlnPlot( object = data_qc, features = c('nCount_ATAC', 'TSS.enrichment', 'blacklist_ratio', 'nucleosome_signal', 'pct_reads_in_peaks'), pt.size = 0.1, ncol = 5 )
head(rownames(data@assays$ATAC@meta.features)) head(rownames(data@assays$ATAC@counts)) head(rownames(data@assays$ATAC@data)) head(data@assays$ATAC@ranges@seqinfo@seqnames)
############
data_qc <- RunTFIDF(data_qc) data_qc <- FindTopFeatures(object=data_qc, min.cutoff = 'q0') data_qc <- RunSVD(data_qc)
DepthCor(data_qc)`
After data_qc <- FindTopFeatures(object=data_qc, min.cutoff = 'q0')
I get:
> data_qc <- FindTopFeatures(object=data_qc, min.cutoff = 'q0') Error: None of the features provided are in this Assay object In addition: Warning message: Feature names cannot have underscores '_', replacing with dashes '-'TRUE
`R version 4.3.2 (2023-10-31) Platform: aarch64-apple-darwin20 (64-bit) Running under: macOS Sonoma 14.2.1
locale: [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
time zone: Europe/Stockholm tzcode source: internal
attached base packages: [1] stats4 stats graphics grDevices utils datasets methods base
other attached packages:
[1] rtracklayer_1.62.0 GenomicRanges_1.54.1 GenomeInfoDb_1.38.5 IRanges_2.36.0
[5] S4Vectors_0.40.2 BiocGenerics_0.48.1 cowplot_1.1.2 lubridate_1.9.3
[9] forcats_1.0.0 stringr_1.5.1 dplyr_1.1.4 purrr_1.0.2
[13] readr_2.1.4 tidyr_1.3.0 tibble_3.2.1 ggplot2_3.4.4
[17] tidyverse_2.0.0 Signac_1.11.0 SeuratObject_4.1.4 Seurat_4.3.0.1
loaded via a namespace (and not attached):
[1] RColorBrewer_1.1-3 rstudioapi_0.15.0 jsonlite_1.8.8
[4] magrittr_2.0.3 spatstat.utils_3.0-4 farver_2.1.1
[7] BiocIO_1.12.0 zlibbioc_1.48.0 vctrs_0.6.5
[10] ROCR_1.0-11 spatstat.explore_3.2-5 Rsamtools_2.18.0
[13] RCurl_1.98-1.13 RcppRoll_0.3.0 htmltools_0.5.7
[16] S4Arrays_1.2.0 SparseArray_1.2.3 sctransform_0.4.1
[19] parallelly_1.36.0 KernSmooth_2.23-22 htmlwidgets_1.6.4
[22] ica_1.0-3 plyr_1.8.9 plotly_4.10.3
[25] zoo_1.8-12 GenomicAlignments_1.38.0 igraph_1.6.0
[28] mime_0.12 lifecycle_1.0.4 pkgconfig_2.0.3
[31] Matrix_1.6-4 R6_2.5.1 fastmap_1.1.1
[34] GenomeInfoDbData_1.2.11 MatrixGenerics_1.14.0 fitdistrplus_1.1-11
[37] future_1.33.1 shiny_1.8.0 digest_0.6.33
[40] colorspace_2.1-0 patchwork_1.1.3 tensor_1.5
[43] irlba_2.3.5.1 labeling_0.4.3 progressr_0.14.0
[46] timechange_0.2.0 fansi_1.0.6 spatstat.sparse_3.0-3
[49] polyclip_1.10-6 httr_1.4.7 abind_1.4-5
[52] compiler_4.3.2 bit64_4.0.5 withr_2.5.2
[55] BiocParallel_1.36.0 MASS_7.3-60 DelayedArray_0.28.0
[58] rjson_0.2.21 tools_4.3.2 lmtest_0.9-40
[61] httpuv_1.6.13 future.apply_1.11.1 goftest_1.2-3
[64] glue_1.6.2 restfulr_0.0.15 nlme_3.1-164
[67] promises_1.2.1 grid_4.3.2 Rtsne_0.17
[70] cluster_2.1.6 reshape2_1.4.4 generics_0.1.3
[73] hdf5r_1.3.8 gtable_0.3.4 spatstat.data_3.0-3
[76] tzdb_0.4.0 hms_1.1.3 data.table_1.14.10
[79] sp_2.1-2 utf8_1.2.4 XVector_0.42.0
[82] spatstat.geom_3.2-7 RcppAnnoy_0.0.21 ggrepel_0.9.4
[85] RANN_2.6.1 pillar_1.9.0 later_1.3.2
[88] splines_4.3.2 lattice_0.22-5 bit_4.0.5
[91] deldir_2.0-2 survival_3.5-7 tidyselect_1.2.0
[94] Biostrings_2.70.1 miniUI_0.1.1.1 pbapply_1.7-2
[97] gridExtra_2.3 SummarizedExperiment_1.32.0 scattermore_1.2
[100] Biobase_2.62.0 matrixStats_1.2.0 stringi_1.8.3
[103] lazyeval_0.2.2 yaml_2.3.8 codetools_0.2-19
[106] cli_3.6.2 uwot_0.1.16 xtable_1.8-4
[109] reticulate_1.34.0 munsell_0.5.0 Rcpp_1.0.11
[112] spatstat.random_3.2-2 globals_0.16.2 png_0.1-8
[115] XML_3.99-0.16 parallel_4.3.2 ellipsis_0.3.2
[118] bitops_1.0-7 listenv_0.9.0 viridisLite_0.4.2
[121] scales_1.3.0 ggridges_0.5.5 leiden_0.4.3.1
[124] crayon_1.5.2 rlang_1.1.2 fastmatch_1.1-4 `
You can't modify feature names in the object by changing the matrix rownames the way you are doing, ie:
rownames(fibrin@assays$ATAC@counts) <- names_fib
rownames(fibrin@assays$ATAC@data) <- names_fib
rownames(fibrin@assays$ATAC@meta.features) <- names_fib
this will cause issues downstream in the analysis since the feature names do not match in different parts of the object.
If you need to alter the feature names you should do this in the original count matrix before creating the Seurat object.
Hi, I implemented that modification, extracted the human cells from the Seurat object and subsetted the counts based on those cell IDs. I'm still running into similar issues since I can't really modify the peaks inside the object. Any idea?
`> head(rownames(data@assays$ATAC@meta.features)) [1] "chr1-180981-181820" "chr1-183881-184783" "chr1-191047-191922" "chr1-267574-268593" "chr1-502199-503098" [6] "chr1-585748-586646"
head(rownames(data@assays$ATAC@counts)) [1] "chr1-180981-181820" "chr1-183881-184783" "chr1-191047-191922" "chr1-267574-268593" "chr1-502199-503098" [6] "chr1-585748-586646" head(rownames(data@assays$ATAC@data)) [1] "chr1-180981-181820" "chr1-183881-184783" "chr1-191047-191922" "chr1-267574-268593" "chr1-502199-503098" [6] "chr1-585748-586646" head(Fragments(data)[[1]]) #Still has the underscore!!! chrom start end barcode readCount 1 GRCh38_chr1 10084 10271 ATATGCAAGCCGGTAA-1 1 2 GRCh38_chr1 10084 10321 GGTCAACCATCCCAAT-1 1 3 GRCh38_chr1 10126 10332 GCGATCCGTCACACCA-1 1 4 GRCh38_chr1 10132 10285 GTCTTAGTCACAGCAG-1 1 5 GRCh38_chr1 10151 10327 CCCACATAGAAACCTA-1 1 6 GRCh38_chr1 10163 10340 GTTTGGGAGCAGGTAT-1 1`
I think you will have to modify your fragment file so that the chromosome names don't have an underscore, since this will be changed to something else in the Seurat object. In the end your chromosome names will have to match in your Seurat object and in the fragment file, otherwise you're not going to be able to find fragments based on their genomic coordinates correctly.
Yeah thats what Im thinking but I don't see a straight way of doing it since the genome prefixes are there for the barnyard difference in cellranger. Is there any way of modifying the data in the fragment class in R?
Best
Is there any way of modifying the data in the fragment class in R?
No, the fragment class just points to the file on disk, all the data is still stored there. In the future we may try to add some functionality to provide on-the-fly chromosome renaming, but for now you would need to change the chromosome names in the file. I'd suggest using sed
to replace characters in the file.
I could work around this by selecting my human barcodes, subsetting the count matrix, and cleaning the feature names there and dropping the few mouse reads on the human cells. From there I could do the new chromatin assay from that count matrix and the seurat object without much problem. Just for downstream analysis, peak calling TSS enrichment and so on need to add the genome prefix to the annotation as it is in the fragments file
Hi, I am analyzing a dataset consisting of 2 samples, each sample is mapped to the human-mouse genome. I can run the pipeline correctly up until finding the top features, where I get an error saying the features are not present in the Assay. but when manually checking the feature names in
@data
,@counts
, and@metadata
they are all the same. Am I missing some other spot?Best