stuart-lab / signac

R toolkit for the analysis of single-cell chromatin data
https://stuartlab.org/signac/
Other
331 stars 88 forks source link

Issues with LinkPeaks #1851

Open kryptonitecircuit2002 opened 4 days ago

kryptonitecircuit2002 commented 4 days ago

I am analyzing a 10X single cell multiome data carried out in 24 hf zebrafish embryos after running standard cellranger arc pipeline for generating counts file and atac fragments

This is the code I used:

library(Seurat)
library(Signac)
library(dplyr)
library(ggplot2)
library(GenomicRanges)
library(future)
library(patchwork)
library(hdf5r)
library(readr)
library(pheatmap)
library(ggrepel)
library(LSD)
library(MASS)
library(ensembldb)

inputdata.10x <- Read10X_h5("D:/Transit/Single_Cell+ATAC/Control/ATAC/Data+Analysis/Analysis_Control/filtered_feature_bc_matrix_control.h5")
fragpath <- "D:/Transit/Single_Cell+ATAC/Control/ATAC/Data+Analysis/Analysis_Control/atac_fragments.tsv.gz"

rna_counts <- inputdata.10x$`Gene Expression`
atac_counts <- inputdata.10x$Peaks

gref.path = "D:/Transit/Single_Cell+ATAC/Control/ATAC/Data+Analysis/Analysis_Control/Danio_rerio.GRCz11.105.gtf"
gtf_zf <- rtracklayer::import(gref.path)
gene.coords.zf <- gtf_zf
gene.coords.zf <- gene.coords.zf[! is.na(gene.coords.zf$gene_name),]
gene.coords.zf <- keepStandardChromosomes(gene.coords.zf, pruning.mode = 'coarse')
genome(gene.coords.zf) <- 'GRCz11'
gene.coords.zf$tx_id <- gene.coords.zf$gene_id
gene.coords.zf$transcript_id <- gene.coords.zf$gene_id

Control.data <- CreateSeuratObject(counts = rna_counts)
chrom_assay <- CreateChromatinAssay(
  counts = atac_counts,
  sep = c(":", "-"),
  genome = 'GRCz11',
  fragments = fragpath,
  annotation = gene.coords.zf
)
Control.data[["ATAC"]]<- chrom_assay

DefaultAssay(Control.data) <- "ATAC"
Control.data <- NucleosomeSignal(Control.data)
Control.data <- TSSEnrichment(Control.data)
DensityScatter(Control.data, x = 'nCount_ATAC', y = 'TSS.enrichment', log_x = TRUE, quantiles = TRUE)
VlnPlot(
  object = Control.data,
  features = c("nCount_RNA", "nCount_ATAC", "TSS.enrichment", "nucleosome_signal"),
  ncol = 4,
  pt.size = 0
)

Control.data <- subset(
  x = Control.data,
  subset = nCount_ATAC < 150000 &
    nCount_RNA < 20000 &
    nucleosome_signal < 1.5 &
    TSS.enrichment > 1
)

DefaultAssay(Control.data) <- "RNA"
Control.data <- SCTransform(Control.data)
Control.data <- RunPCA(Control.data)
Control.data <- RunUMAP(Control.data, dims = 1:50, reduction.name = 'umap.rna', reduction.key = 'rnaUMAP_')

DefaultAssay(Control.data) <- "ATAC"
Control.data <- RunTFIDF(Control.data)
Control.data <- FindTopFeatures(Control.data, min.cutoff = 'q0')
Control.data <- RunSVD(Control.data)
Control.data <- RunUMAP(Control.data, reduction = 'lsi', dims = 2:50, reduction.name = "umap.atac", reduction.key = "atacUMAP_")

Control.data <- FindMultiModalNeighbors(Control.data, reduction.list = list("pca", "lsi"), dims.list = list(1:50, 2:50))
Control.data <- RunUMAP(Control.data, nn.name = "weighted.nn", reduction.name = "wnn.umap", reduction.key = "wnnUMAP_")
Control.data <- FindClusters(Control.data, graph.name = "wsnn", algorithm = 3, verbose = FALSE)

p1 <- DimPlot(Control.data, reduction = "umap.rna", label = TRUE, label.size = 2.5, repel = TRUE) + ggtitle("RNA")
p2 <- DimPlot(Control.data, reduction = "umap.atac", label = TRUE, label.size = 2.5, repel = TRUE) + ggtitle("ATAC")
p3 <- DimPlot(Control.data, reduction = "wnn.umap", label = TRUE, label.size = 2.5, repel = TRUE) + ggtitle("WNN")
p1 + p2 + p3 & NoLegend() & theme(plot.title = element_text(hjust = 0.5))

Control.data <- RegionStats(Control.data, genome = BSgenome.Drerio.UCSC.danRer11)

Control.data <- LinkPeaks(
  object = Control.data,
  peak.assay = "ATAC",
  expression.assay = "SCT",
  genes.use = c("mitfa")
)
CoveragePlot(Control.data, 
             region = "sox10", 
             features = "sox10", 
             assay = 'ATAC', 
             expression.assay = 'SCT', 
             peaks = TRUE)

However I am facing issues when I am trying to run LinkPeaks for my data and facing this error:

> Control.data <- LinkPeaks(
+   object = Control.data,
+   peak.assay = "ATAC",
+   expression.assay = "SCT",
+   genes.use = c("mitfa")
+ )
Testing 1 genes and 136545 peaks
  |                                                  | 0 % ~calculating  Error in density.default(x = query.feature[[featmatch]], kernel = "gaussian",  : 
  argument 'x' must be numeric
In addition: Warning messages:
1: In .merge_two_Seqinfo_objects(x, y) :
  Each of the 2 combined objects has sequence levels not in the other:
  - in 'x': KN147632.2, KN147636.1, KN147637.2, KN147651.2, KN149685.1, KN149686.1, KN149688.2, KN149689.2, KN149690.1, KN149696.2, KN149702.1, KN149707.2, KN149710.1, KN149711.1, KN149713.1, KN149725.1, KN149729.1, KN149732.1, KN149735.1, KN149739.1, KN149749.1, KN149764.1, KN149778.1, KN149779.1, KN149782.1, KN149790.1, KN149793.1, KN149797.1, KN149800.1, KN149813.1, KN149816.1, KN149818.1, KN149840.1, KN149847.1, KN149855.1, KN149857.1, KN149859.1, KN149874.1, KN149880.1, KN149883.1, KN149884.1, KN149895.1, KN149909.1, KN149912.1, KN149932.1, KN149943.1, KN149946.1, KN149948.1, KN149955.1, KN149959.1, KN149968.1, KN149978.1, KN149984.1, KN149992.1, KN149995.1, KN149997.1, KN149998.1, KN150000.1, KN150008.1, KN150015.1, KN150019.2, KN150030.1, KN150032.1, KN150041.2, KN150062.1, KN150067.1, KN150086.1, KN150098.1, KN150102.1, KN150104.1, KN150115.1, KN150120.1, KN150125.1, KN150131.1, KN150137.1, KN150141.1, KN15015 [... truncated]
2: In MatchRegionStats(meta.feature = meta.use, query.feature = pk.use[x,  :
  Requested more features than present in supplied data.
            Returning 0 features

Can anyone help me how to fix this?

Here is my sessioninfo:

 R version 4.4.1 (2024-06-14 ucrt)
Platform: x86_64-w64-mingw32/x64
Running under: Windows 11 x64 (build 22631)

Matrix products: default

locale:
[1] LC_COLLATE=English_India.utf8  LC_CTYPE=English_India.utf8    LC_MONETARY=English_India.utf8
[4] LC_NUMERIC=C                   LC_TIME=English_India.utf8    

time zone: Asia/Calcutta
tzcode source: internal

attached base packages:
[1] stats4    stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
 [1] BSgenome.Drerio.UCSC.danRer11_1.4.2 BSgenome_1.72.0                     rtracklayer_1.64.0                 
 [4] BiocIO_1.14.0                       Biostrings_2.72.1                   XVector_0.44.0                     
 [7] ensembldb_2.28.1                    AnnotationFilter_1.28.0             GenomicFeatures_1.56.0             
[10] AnnotationDbi_1.66.0                Biobase_2.64.0                      MASS_7.3-61                        
[13] LSD_4.1-0                           ggrepel_0.9.6                       pheatmap_1.0.12                    
[16] readr_2.1.5                         hdf5r_1.3.11                        patchwork_1.3.0                    
[19] future_1.34.0                       GenomicRanges_1.56.2                GenomeInfoDb_1.40.1                
[22] IRanges_2.38.1                      S4Vectors_0.42.1                    BiocGenerics_0.50.0                
[25] ggplot2_3.5.1                       dplyr_1.1.4                         Signac_1.14.0                      
[28] Seurat_5.1.0                        SeuratObject_5.0.2                  sp_2.1-4                           

loaded via a namespace (and not attached):
  [1] RcppAnnoy_0.0.22            splines_4.4.1               later_1.3.2                 bitops_1.0-9               
  [5] tibble_3.2.1                polyclip_1.10-7             XML_3.99-0.17               fastDummies_1.7.4          
  [9] lifecycle_1.0.4             globals_0.16.3              lattice_0.22-6              magrittr_2.0.3             
 [13] plotly_4.10.4               yaml_2.3.10                 httpuv_1.6.15               sctransform_0.4.1          
 [17] spam_2.11-0                 spatstat.sparse_3.1-0       reticulate_1.39.0           cowplot_1.1.3              
 [21] pbapply_1.7-2               DBI_1.2.3                   RColorBrewer_1.1-3          abind_1.4-8                
 [25] zlibbioc_1.50.0             Rtsne_0.17                  purrr_1.0.2                 RCurl_1.98-1.16            
 [29] GenomeInfoDbData_1.2.12     irlba_2.3.5.1               listenv_0.9.1               spatstat.utils_3.1-1       
 [33] goftest_1.2-3               RSpectra_0.16-2             spatstat.random_3.3-2       fitdistrplus_1.2-1         
 [37] parallelly_1.39.0           DelayedArray_0.30.1         leiden_0.4.3.1              codetools_0.2-20           
 [41] RcppRoll_0.3.1              tidyselect_1.2.1            UCSC.utils_1.0.0            farver_2.1.2               
 [45] matrixStats_1.4.1           spatstat.explore_3.3-3      GenomicAlignments_1.40.0    jsonlite_1.8.9             
 [49] progressr_0.15.1            ggridges_0.5.6              survival_3.7-0              tools_4.4.1                
 [53] ica_1.0-3                   Rcpp_1.0.13                 glue_1.7.0                  SparseArray_1.4.8          
 [57] gridExtra_2.3               MatrixGenerics_1.16.0       withr_3.0.2                 fastmap_1.2.0              
 [61] fansi_1.0.6                 digest_0.6.37               R6_2.5.1                    mime_0.12                  
 [65] colorspace_2.1-1            scattermore_1.2             tensor_1.5                  spatstat.data_3.1-4        
 [69] RSQLite_2.3.8               utf8_1.2.4                  tidyr_1.3.1                 generics_0.1.3             
 [73] data.table_1.16.2           S4Arrays_1.4.1              httr_1.4.7                  htmlwidgets_1.6.4          
 [77] uwot_0.2.2                  pkgconfig_2.0.3             gtable_0.3.6                blob_1.2.4                 
 [81] lmtest_0.9-40               htmltools_0.5.8.1           dotCall64_1.2               ProtGenerics_1.36.0        
 [85] scales_1.3.0                png_0.1-8                   spatstat.univar_3.1-1       rstudioapi_0.17.1          
 [89] tzdb_0.4.0                  reshape2_1.4.4              rjson_0.2.23                nlme_3.1-166               
 [93] curl_6.0.1                  zoo_1.8-12                  cachem_1.1.0                stringr_1.5.1              
 [97] KernSmooth_2.23-24          vipor_0.4.7                 parallel_4.4.1              miniUI_0.1.1.1             
[101] ggrastr_1.0.2               restfulr_0.0.15             pillar_1.9.0                grid_4.4.1                 
[105] vctrs_0.6.5                 RANN_2.6.2                  promises_1.3.0              xtable_1.8-4               
[109] cluster_2.1.6               beeswarm_0.4.0              cli_3.6.3                   compiler_4.4.1             
[113] Rsamtools_2.20.0            rlang_1.1.4                 crayon_1.5.3                future.apply_1.11.3        
[117] labeling_0.4.3              ggbeeswarm_0.7.2            plyr_1.8.9                  stringi_1.8.4              
[121] viridisLite_0.4.2           deldir_2.0-4                BiocParallel_1.38.0         munsell_0.5.1              
[125] lazyeval_0.2.2              spatstat.geom_3.3-3         Matrix_1.7-0                RcppHNSW_0.6.0             
[129] hms_1.1.3                   bit64_4.5.2                 KEGGREST_1.44.1             shiny_1.9.1                
[133] SummarizedExperiment_1.34.0 ROCR_1.0-11                 igraph_2.0.3                memoise_2.0.1              
[137] fastmatch_1.1-4             bit_4.5.0                  
Warning messages:
1: In grid.Call(C_convert, x, as.integer(whatfrom), as.integer(whatto),  :
  Viewport has zero dimension(s)
2: In grid.Call(C_convert, x, as.integer(whatfrom), as.integer(whatto),  :
  Viewport has zero dimension(s)
3: In grid.Call(C_convert, x, as.integer(whatfrom), as.integer(whatto),  :
  Viewport has zero dimension(s)
4: In grid.Call(C_convert, x, as.integer(whatfrom), as.integer(whatto),  :
  Viewport has zero dimension(s)
5: In grid.Call(C_convert, x, as.integer(whatfrom), as.integer(whatto),  :
  Viewport has zero dimension(s)
6: In grid.Call(C_convert, x, as.integer(whatfrom), as.integer(whatto),  :
  Viewport has zero dimension(s)
nh-codem commented 2 days ago

Have you encountered an issue where peaks and their linked genes are located on different chromosomes? @kryptonitecircuit2002