markrobinsonuzh / cytofWorkflow

MIT License
14 stars 3 forks source link

Error in prepData(fs, panel, md, features = panel$fcs_colname) : Couldn't match 'flowSet'/FCS filenames with those listed in 'md[[md_cols$file]]'. #37

Closed yueli8 closed 4 months ago

yueli8 commented 4 months ago

Hello,

Thank you so much for developing so nice software.

I was trying to go throughCyTOF workflow: differential discovery in high-throughput high-dimensional cytometry datasets which updated at 3 May 2024. When I want to sce <- prepData(fs, panel, md, features = panel$fcs_colname), it shows up:

Error in prepData(fs, panel, md, features = panel$fcs_colname) : 
  Couldn't match 'flowSet'/FCS filenames
with those listed in 'md[[md_cols$file]]'.

Thank you in advance for your great help!

Best,

Yue

 library(cytofWorkflow)
 library(readxl)
 library(ggplot2)
 library(cowplot)
 library(Seurat)
 library(readxl)
 library(HDCytoData)
 setwd("~/mass_cyto_quan-main")

 md <- "PBMC8_metadata.xlsx"
 md <- read_excel(md)

 head(data.frame(md))                                                                                                  
                           file_name sample_id condition patient_id
1    PBMC8_30min_patient1_BCR-XL.fcs    BCRXL1     BCRXL   Patient1
2 PBMC8_30min_patient1_Reference.fcs      Ref1       Ref   Patient1

 fs <- Bodenmiller_BCR_XL_flowSet()
see ?HDCytoData and browseVignettes('HDCytoData') for documentation
loading from cache
Warning message:
In updateObjectFromSlots(object, ..., verbose = verbose) :
  dropping slot(s) 'colnames' from object = 'flowSet'
 panel <- "PBMC8_panel_v3.xlsx"
 panel <- read_excel(panel)

 head(data.frame(panel))                                                                                               
     fcs_colname antigen marker_class
1 CD3(110:114)Dd     CD3         type
2  CD45(In115)Dd    CD45         type
3 pNFkB(Nd142)Dd   pNFkB        state
4  pp38(Nd144)Dd    pp38        state
5   CD4(Nd145)Dd     CD4         type
6  CD20(Sm147)Dd    CD20         type

 all(panel$fcs_colname %in% colnames(fs))
[1] TRUE

 md$condition <- factor(md$condition, levels = c("Ref", "BCRXL"))
 md$sample_id <- factor(md$sample_id, 
+                        levels = md$sample_id[order(md$condition)])
 # construct SingleCellExperiment
 sce <- prepData(fs, panel, md, features = panel$fcs_colname)

Error in prepData(fs, panel, md, features = panel$fcs_colname) : 
  Couldn't match 'flowSet'/FCS filenames
with those listed in 'md[[md_cols$file]]'.

 sessionInfo()
R version 4.4.0 (2024-04-24)
Platform: x86_64-pc-linux-gnu
Running under: Ubuntu 18.04.5 LTS

Matrix products: default
BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.7.1 
LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.7.1

Random number generation:
 RNG:     Mersenne-Twister 
 Normal:  Inversion 
 Sample:  Rounding 

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C               LC_TIME=zh_CN.UTF-8        LC_COLLATE=en_US.UTF-8     LC_MONETARY=zh_CN.UTF-8   
 [6] LC_MESSAGES=en_US.UTF-8    LC_PAPER=zh_CN.UTF-8       LC_NAME=C                  LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=zh_CN.UTF-8 LC_IDENTIFICATION=C       

time zone: America/Los_Angeles
tzcode source: system (glibc)

attached base packages:
[1] stats4    stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
 [1] cytofWorkflow_1.28.0        uwot_0.2.2                  Matrix_1.7-0                HDCytoData_1.24.0           flowCore_2.16.0            
 [6] ExperimentHub_2.12.0        AnnotationHub_3.12.0        BiocFileCache_2.12.0        dbplyr_2.5.0                diffcyt_1.24.0             
[11] CATALYST_1.28.0             SingleCellExperiment_1.26.0 SummarizedExperiment_1.34.0 Biobase_2.64.0              GenomicRanges_1.56.0       
[16] GenomeInfoDb_1.40.1         IRanges_2.38.0              S4Vectors_0.42.0            BiocGenerics_0.50.0         MatrixGenerics_1.16.0      
[21] matrixStats_1.3.0           knitr_1.47                  BiocStyle_2.32.0            BiocManager_1.30.23         Seurat_5.1.0               
[26] SeuratObject_5.0.2          sp_2.1-4                    cowplot_1.1.3               ggplot2_3.5.1               readxl_1.4.3               

loaded via a namespace (and not attached):
  [1] spatstat.sparse_3.0-3       httr_1.4.7                  RColorBrewer_1.1-3          doParallel_1.0.17           tools_4.4.0                
  [6] sctransform_0.4.1           backports_1.5.0             utf8_1.2.4                  R6_2.5.1                    lazyeval_0.2.2             
 [11] GetoptLong_1.0.5            withr_3.0.0                 gridExtra_2.3               progressr_0.14.0            textshaping_0.4.0          
 [16] cli_3.6.2                   Cairo_1.6-2                 spatstat.explore_3.2-7      fastDummies_1.7.3           sandwich_3.1-0             
 [21] labeling_0.4.3              nnls_1.5                    mvtnorm_1.2-5               spatstat.data_3.0-4         ggridges_0.5.6             
 [26] pbapply_1.7-2               systemfonts_1.1.0           colorRamps_2.3.4            scater_1.32.0               parallelly_1.37.1          
 [31] plotrix_3.8-4               limma_3.60.2                RSQLite_2.3.7               rstudioapi_0.16.0           FNN_1.1.4                  
 [36] generics_0.1.3              shape_1.4.6.1               gtools_3.9.5                ica_1.0-3                   spatstat.random_3.2-3      
 [41] car_3.1-2                   dplyr_1.1.4                 RProtoBufLib_2.16.0         ggbeeswarm_0.7.2            fansi_1.0.6                
 [46] abind_1.4-5                 lifecycle_1.0.4             edgeR_4.2.0                 multcomp_1.4-25             yaml_2.3.8                 
 [51] carData_3.0-5               SparseArray_1.4.8           Rtsne_0.17                  blob_1.2.4                  grid_4.4.0                 
 [56] promises_1.3.0              crayon_1.5.2                miniUI_0.1.1.1              lattice_0.22-6              beachmat_2.20.0            
 [61] KEGGREST_1.44.0             pillar_1.9.0                ComplexHeatmap_2.20.0       boot_1.3-30                 rjson_0.2.21               
 [66] future.apply_1.11.2         codetools_0.2-20            leiden_0.4.3.1              glue_1.7.0                  data.table_1.15.4          
 [71] vctrs_0.6.5                 png_0.1-8                   spam_2.10-0                 cellranger_1.1.0            gtable_0.3.5               
 [76] cachem_1.1.0                xfun_0.44                   S4Arrays_1.4.1              mime_0.12                   ConsensusClusterPlus_1.68.0
 [81] survival_3.7-0              pheatmap_1.0.12             iterators_1.0.14            cytolib_2.16.0              statmod_1.5.0              
 [86] fitdistrplus_1.1-11         TH.data_1.1-2               ROCR_1.0-11                 nlme_3.1-165                bit64_4.0.5                
 [91] filelock_1.0.3              RcppAnnoy_0.0.22            irlba_2.3.5.1               vipor_0.4.7                 KernSmooth_2.23-24         
 [96] DBI_1.2.3                   colorspace_2.1-0            tidyselect_1.2.1            curl_5.2.1                  bit_4.0.5                  
[101] compiler_4.4.0              BiocNeighbors_1.22.0        DelayedArray_0.30.1         plotly_4.10.4               scales_1.3.0               
[106] lmtest_0.9-40               rappdirs_0.3.3              stringr_1.5.1               digest_0.6.35               goftest_1.2-3              
[111] minqa_1.2.7                 spatstat.utils_3.0-4        rmarkdown_2.27              XVector_0.44.0              htmltools_0.5.8.1          
[116] pkgconfig_2.0.3             lme4_1.1-35.3               sparseMatrixStats_1.16.0    fastmap_1.2.0               rlang_1.1.4                
[121] GlobalOptions_0.1.2         htmlwidgets_1.6.4           UCSC.utils_1.0.0            shiny_1.8.1.1               DelayedMatrixStats_1.26.0  
[126] farver_2.1.2                zoo_1.8-12                  jsonlite_1.8.8              BiocParallel_1.38.0         BiocSingular_1.20.0        
[131] magrittr_2.0.3              scuttle_1.14.0              GenomeInfoDbData_1.2.12     dotCall64_1.1-1             patchwork_1.2.0            
[136] munsell_0.5.1               Rcpp_1.0.12                 ggnewscale_0.4.10           viridis_0.6.5               reticulate_1.37.0          
[141] stringi_1.8.4               zlibbioc_1.50.0             MASS_7.3-60.2               plyr_1.8.9                  parallel_4.4.0             
[146] listenv_0.9.1               ggrepel_0.9.5               deldir_2.0-4                Biostrings_2.72.1           splines_4.4.0              
[151] tensor_1.5                  circlize_0.4.16             locfit_1.5-9.9              igraph_2.0.3                ggpubr_0.6.0               
[156] spatstat.geom_3.2-9         ggsignif_0.6.4              RcppHNSW_0.6.0              reshape2_1.4.4              ScaledMatrix_1.12.0        
[161] BiocVersion_3.19.1          XML_3.99-0.16.1             drc_3.0-1                   evaluate_0.24.0             nloptr_2.0.3               
[166] foreach_1.5.2               tweenr_2.0.3                httpuv_1.6.15               RANN_2.6.1                  tidyr_1.3.1                
[171] purrr_1.0.2                 polyclip_1.10-6             future_1.33.2               clue_0.3-65                 scattermore_1.2            
[176] ggforce_0.4.2               rsvd_1.0.5                  broom_1.0.6                 xtable_1.8-4                RSpectra_0.16-1            
[181] rstatix_0.7.2               later_1.3.2                 ragg_1.3.2                  viridisLite_0.4.2           tibble_3.2.1               
[186] AnnotationDbi_1.66.0        memoise_2.0.1               FlowSOM_2.12.0              beeswarm_0.4.0              cluster_2.1.6              
[191] globals_0.16.3
HelenaLC commented 4 months ago

The checks leading up to this are shown below ... i.e., either fsApply(fs, identifier) or keyword(fs, "FILENAME") should match what's listed in your md table. Please check what those return for your data and adjust the md table accordingly.

    # check that filenames or identifiers 
    # match b/w 'flowSet' & metadata
    ids0 <- md[[md_cols$file]]
    ids1 <- fsApply(fs, identifier)
    ids2 <- keyword(fs, "FILENAME")
    if (length(unlist(ids2)) == length(fs))
        ids2 <- basename(ids2)
    check1 <- all(ids1 %in% ids0)
    check2 <- all(ids2 %in% ids0)
    ids_use <- which(c(check1, check2))[1]
    ids <- list(ids1, ids2)[[ids_use]]
    if (is.null(ids)) {
        stop("Couldn't match 'flowSet'/FCS filenames\n", 
            "with those listed in 'md[[md_cols$file]]'.")
    } else {
        # reorder 'flowSet' frames according to metadata table
        fs <- fs[match(md[[md_cols$file]], ids)]
    }
yueli8 commented 4 months ago

Data should downloaded from: https://zenodo.org/records/10039274/files