rezakj / iCellR

Single (i) Cell R package (iCellR) is an interactive R package to work with high-throughput single cell sequencing technologies (i.e scRNA-seq, scVDJ-seq, scATAC-seq, CITE-Seq and Spatial Transcriptomics (ST)).
120 stars 19 forks source link

VDJ analysis: can I use filtered.contig? Error when I run clono.plot #25

Closed france-hub closed 3 years ago

france-hub commented 3 years ago

Hi rezakj, thank you for your useful package!

I have two questions: 1) Can I run prep.vdj on filtered_contig_annotations.csv or it's better to have the all_contig_annotations.csv? 2) I have an issue with the clono.plot function. I'll summarize the code so that maybe you can help me to find the issue:

`#Convert Seurat to iCellR object my.data <- as.data.frame(as.matrix(sobj@assays$RNA@counts)) my.obj <- make.obj(my.data)

Add Seurat's data

myUMAP <- as.data.frame(sobj@reductions$umap@cell.embeddings) myPCA <- as.data.frame(sobj@reductions$pca@cell.embeddings) Clust <- as.data.frame(as.matrix(Idents(sobj))) colnames(Clust) <- "clusters" my.obj@main.data <- as.data.frame(sobj[["RNA"]]@data) my.obj@umap.data <- myUMAP my.obj@pca.data <- myPCA my.obj@best.clust <- Clust my.obj@metadata <- sobj@meta.data

Check the new object

slotNames(my.obj) my.obj

Read in contig annotated files

contig_batch1 <- paste(PrimaryDirectory, "data_VDJ/20018-01", sep ='/') contig_batch2 <- paste(PrimaryDirectory, "data_VDJ/20018-04", sep ='/') my.vdj.1 <- read.csv(paste(contig_batch1,"06/S06.csv", sep = "/")) my.vdj.2 <- read.csv(paste(contig_batch1,"07/S07.csv", sep = "/")) my.vdj.3 <- read.csv(paste(contig_batch2, "01/filtered_contig_annotations.csv", sep = "/")) my.vdj.4 <- read.csv(paste(contig_batch2, "02/filtered_contig_annotations.csv", sep = "/")) my.vdj.5 <- read.csv(paste(contig_batch2, "03/filtered_contig_annotations.csv", sep = "/")) my.vdj.6 <- read.csv(paste(contig_batch2, "04/filtered_contig_annotations.csv", sep = "/")) my.vdj.7 <- read.csv(paste(contig_batch2, "05/filtered_contig_annotations.csv", sep = "/")) my.vdj.8 <- read.csv(paste(contig_batch2, "06/filtered_contig_annotations.csv", sep = "/"))

Run prep.vdj

my.vdj.1 <- prep.vdj(vdj.data = my.vdj.1, cond.name = "CR_bas_229") my.vdj.2 <- prep.vdj(vdj.data = my.vdj.2, cond.name = "CR_post_229") my.vdj.3 <- prep.vdj(vdj.data = my.vdj.3, cond.name = "NR_bas_V09") my.vdj.4 <- prep.vdj(vdj.data = my.vdj.4, cond.name = "NR_post_V09") my.vdj.5 <- prep.vdj(vdj.data = my.vdj.5, cond.name = "NR_bas_219") my.vdj.6 <- prep.vdj(vdj.data = my.vdj.6, cond.name = "NR_post_219") my.vdj.7 <- prep.vdj(vdj.data = my.vdj.7, cond.name = "CR_bas_264") my.vdj.8 <- prep.vdj(vdj.data = my.vdj.8, cond.name = "CR_post_264")

Create vdj dataframe and add it to iCellR object my.vdj.data <- rbind(my.vdj.1, my.vdj.2, my.vdj.3, my.vdj.4, my.vdj.5, my.vdj.6, my.vdj.7, my.vdj.8) my.obj <- add.vdj(my.obj, vdj.data = my.vdj.data)

head(my.vdj.data) raw_clonotype_id barcode is_cell contig_id high_confidence length chain v_gene d_gene j_gene c_gene full_length productive 1 clonotype1 CR_bas_229_TGGTTCCGTTGCTCCT.1 True TGGTTCCGTTGCTCCT-1_contig_1 True 530 TRB TRBV18 None TRBJ1-2 TRBC1 True True 2 clonotype1 CR_bas_229_GATCAGTGTTAGATGA.1 True GATCAGTGTTAGATGA-1_contig_1 True 489 TRA TRAV38-1 None TRAJ38 TRAC True True 3 clonotype1 CR_bas_229_GTTACAGTCCACGCAG.1 True GTTACAGTCCACGCAG-1_contig_2 True 530 TRB TRBV18 None TRBJ1-2 TRBC1 True True 4 clonotype1 CR_bas_229_CACCAGGAGTTAGCGG.1 True CACCAGGAGTTAGCGG-1_contig_2 True 573 TRA TRAV38-1 None TRAJ38 TRAC True True 5 clonotype1 CR_bas_229_CGATCGGTCGTCTGAA.1 True CGATCGGTCGTCTGAA-1_contig_1 True 479 TRA TRAV38-1 None TRAJ38 TRAC True True 6 clonotype1 CR_bas_229_TAAGTGCAGATAGTCA.1 True TAAGTGCAGATAGTCA-1_contig_2 True 514 TRB TRBV18 None TRBJ1-2 TRBC1 True True cdr3 cdr3_nt reads umis raw_consensus_id my.raw_clonotype_id clonotype.Freq proportion total.colonotype 1 CASGAGPRGGYTF TGTGCCAGCGGCGCCGGGCCCCGAGGGGGCTACACCTTC 8090 10 clonotype1_consensus_1 clonotype1 153 0.05103402 2058 2 CAFMKHVISNNRKLIW TGTGCTTTCATGAAGCATGTTATCTCCAACAACCGTAAGCTGATTTGG 10824 6 clonotype1_consensus_2 clonotype1 153 0.05103402 2058 3 CASGAGPRGGYTF TGTGCCAGCGGCGCCGGGCCCCGAGGGGGCTACACCTTC 9800 12 clonotype1_consensus_1 clonotype1 153 0.05103402 2058 4 CAFMKHVISNNRKLIW TGTGCTTTCATGAAGCATGTTATCTCCAACAACCGTAAGCTGATTTGG 4944 6 clonotype1_consensus_2 clonotype1 153 0.05103402 2058 5 CAFMKHVISNNRKLIW TGTGCTTTCATGAAGCATGTTATCTCCAACAACCGTAAGCTGATTTGG 770 5 clonotype1_consensus_2 clonotype1 153 0.05103402 2058 6 CASGAGPRGGYTF TGTGCCAGCGGCGCCGGGCCCCGAGGGGGCTACACCTTC 8696 10 clonotype1_consensus_1 clonotype1 153 0.05103402 2058

clonotype.frequency <- as.data.frame(sort(table(as.character(as.matrix((my.obj@vdj.data)[1]))),decreasing = TRUE))

head(clonotype.frequency) Var1 Freq 1 clonotype1 1577 2 clonotype2 870 3 clonotype3 611 4 clonotype4 516 5 clonotype7 370 6 clonotype6 361

clono.plot(my.obj, plot.data.type = "umap", clonotype.column = 1, barcode.column = 2, clono = "clonotype1", conds.to.plot = NULL, cell.transparency = 1, clust.dim = 2, interactive = F)

Error in $<-.data.frame(*tmp*, "MyCol", value = "red") : replacement has 1 row, data has 0

sessionInfo() R version 4.0.2 (2020-06-22) Platform: x86_64-pc-linux-gnu (64-bit) Running under: Ubuntu 18.04.4 LTS Matrix products: default BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.7.1 LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.7.1 locale: [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8 LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8 [7] LC_PAPER=en_US.UTF-8 LC_NAME=C LC_ADDRESS=C LC_TELEPHONE=C LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C attached base packages: [1] parallel stats4 stats graphics grDevices utils datasets methods base other attached packages: [1] scales_1.1.1 circlize_0.4.10 scater_1.16.2 SingleCellExperiment_1.12.0 SummarizedExperiment_1.20.0 Biobase_2.50.0 [7] GenomicRanges_1.42.0 GenomeInfoDb_1.26.1 IRanges_2.24.0 S4Vectors_0.28.0 BiocGenerics_0.36.0 MatrixGenerics_1.2.0 [13] matrixStats_0.57.0 rstudioapi_0.13 scRepertoire_0.99.16 reshape2_1.4.4 ggalluvial_0.12.2 dplyr_1.0.2 [19] cowplot_1.1.0 Seurat_3.2.0 iCellR_1.5.8 plotly_4.9.2.1 ggplot2_3.3.2 loaded via a namespace (and not attached): [1] reticulate_1.18 tidyselect_1.1.0 htmlwidgets_1.5.2 BiocParallel_1.22.0 grid_4.0.2 Rtsne_0.15 [7] munsell_0.5.0 codetools_0.2-16 ica_1.0-2 future_1.19.1 miniUI_0.1.1.1 withr_2.3.0 [13] colorspace_2.0-0 knitr_1.30 ROCR_1.0-11 ggsignif_0.6.0 tensor_1.5 listenv_0.8.0 [19] GenomeInfoDbData_1.2.4 polyclip_1.10-0 bit64_4.0.5 pheatmap_1.0.12 vctrs_0.3.5 generics_0.1.0 [25] xfun_0.19 R6_2.5.0 doParallel_1.0.16 ggbeeswarm_0.6.0 rsvd_1.0.3 VGAM_1.1-3 [31] hdf5r_1.3.3 bitops_1.0-6 spatstat.utils_1.17-0 reshape_0.8.8 DelayedArray_0.16.0 promises_1.1.1 [37] nnet_7.3-14 beeswarm_0.2.3 gtable_0.3.0 globals_0.13.1 goftest_1.2-2 rlang_0.4.8 [43] scatterplot3d_0.3-41 GlobalOptions_0.1.2 splines_4.0.2 rstatix_0.6.0 lazyeval_0.2.2 broom_0.7.2 [49] checkmate_2.0.0 abind_1.4-5 backports_1.2.0 httpuv_1.5.4 Hmisc_4.4-1 tools_4.0.2 [55] cubature_2.0.4.1 ellipsis_0.3.1 RColorBrewer_1.1-2 ggdendro_0.1.22 ggridges_0.5.2 Rcpp_1.0.5 [61] plyr_1.8.6 base64enc_0.1-3 progress_1.2.2 zlibbioc_1.36.0 purrr_0.3.4 RCurl_1.98-1.2 [67] prettyunits_1.1.1 ggpubr_0.4.0 rpart_4.1-15 deldir_0.1-29 viridis_0.5.1 pbapply_1.4-3 [73] zoo_1.8-8 haven_2.3.1 ggrepel_0.8.2 cluster_2.1.0 colorRamps_2.3 magrittr_2.0.1 [79] data.table_1.13.2 openxlsx_4.2.2 SparseM_1.78 NbClust_3.0 lmtest_0.9-38 RANN_2.6.1 [85] truncdist_1.0-2 fitdistrplus_1.1-1 gsl_2.1-6 hms_0.5.3 patchwork_1.1.0 mime_0.9 [91] xtable_1.8-4 rio_0.5.16 jpeg_0.1-8.1 readxl_1.3.1 shape_1.4.5 gridExtra_2.3 [97] compiler_4.0.2 tibble_3.0.4 KernSmooth_2.23-17 crayon_1.3.4 htmltools_0.5.0 mgcv_1.8-33 [103] later_1.1.0.1 Formula_1.2-4 tidyr_1.1.2 powerTCR_1.8.0 MASS_7.3-53 rappdirs_0.3.1 [109] Matrix_1.2-18 car_3.0-10 permute_0.9-5 evd_2.3-3 igraph_1.2.6 forcats_0.5.0 [115] pkgconfig_2.0.3 foreign_0.8-80 foreach_1.5.1 vipor_0.4.5 stringdist_0.9.6.3 XVector_0.30.0 [121] stringr_1.4.0 digest_0.6.27 sctransform_0.2.1 RcppAnnoy_0.0.17 vegan_2.5-6 spatstat.data_1.4-3 [127] Biostrings_2.56.0 cellranger_1.1.0 leiden_0.3.3 htmlTable_2.1.0 uwot_0.1.8 DelayedMatrixStats_1.10.1 [133] curl_4.3 evmix_2.12 shiny_1.5.0 lifecycle_0.2.0 nlme_3.1-149 jsonlite_1.7.1 [139] BiocNeighbors_1.6.0 carData_3.0-4 viridisLite_0.3.0 pillar_1.4.7 lattice_0.20-41 tcR_2.3.2 [145] fastmap_1.0.1 httr_1.4.2 survival_3.2-7 glue_1.4.2 zip_2.1.1 spatstat_1.64-1 [151] png_0.1-7 iterators_1.0.13 bit_4.0.4 stringi_1.5.3 ggfittext_0.9.0 BiocSingular_1.4.0 [157] latticeExtra_0.6-29 irlba_2.3.3 future.apply_1.6.0 ape_5.4-1

` Any suggestion on how to solve this issue?

Thank you very much

Francesco

rezakj commented 3 years ago

It seems like you are in the right track. For the clonotype data iCellR just plots them on UMAP, tSNE or KNetL map. You can actually do it without iCellR, just get the UMAP dimensions (x and Y) and plot each clonotype separately using ggplot2, this is if you run into issues. Otherwise if the cell barcodes in your main data and vdj data slots match, iCellR should plot them for you. And yes, you can use the filtered vdj data.

france-hub commented 3 years ago

Thank you, it worked!

rezakj commented 3 years ago

Perfect!