neurogenomics / EpiCompare

Comparison, benchmarking & QC of epigenetic datasets
https://doi.org/doi:10.18129/B9.bioc.EpiCompare
12 stars 3 forks source link

`Error in `.rowNamesDF<-`(x, value = value) : duplicate 'row.names' are not allowed` #29

Closed bschilder closed 2 years ago

bschilder commented 2 years ago

Code

### Data stored on Imperial HPC (mounted to local laptop)
root <- "/Volumes/bms20/projects/neurogenomics-lab/live/Data/tip_seq"

### Import the called peak files produced by the [nf-core/cutanrun](https://nf-co.re/cutandrun) pipeline.
peakpaths <- list.files(path = file.path(root,"processed_data"),
                        pattern = "*.peaks.bed.stringent.bed$", 
                        recursive = TRUE, 
                        full.names = TRUE)

names <- paste(basename(dirname(dirname(dirname(peakpaths)))),
               stringr::str_split(basename(peakpaths),"[.]", simplify = TRUE)[,1], 
               sep=".")
peakfiles <- lapply(peakpaths, function(x){
  message(x)
  ChIPseeker::readPeakFile(x, as = "GRanges")
}) %>% `names<-`(names)

### Run EpiCompare
library(EpiCompare)
data("encode_H3K27ac") # example peakfile
data("CnT_H3K27ac") # example peakfile
data("CnR_H3K27ac") # example peakfile
data("hg19_blacklist") # example blacklist 

# peaklist <- list(encode_H3K27ac, peakfiles) 
# namelist <- c("encode", names)
out <- EpiCompare(peakfiles = peakfiles,
           names = names,
           blacklist = hg19_blacklist,
           reference = encode_H3K27ac,
           stat_plot = TRUE,
           chrmHMM_plot = TRUE,
           chipseeker_plot = TRUE,
           enrichment_plot = TRUE,
           save_output = TRUE,
           output_dir = "./")

Errors

``` Warning in .Seqinfo.mergexy(x, y) : Each of the 2 combined objects has sequence levels not in the other: - in 'x': chr11_KI270721v1_random, chr14_GL000009v2_random, chr14_GL000194v1_random, chr14_GL000225v1_random, chr14_KI270726v1_random, chr15_KI270727v1_random, chr16_KI270728v1_random, chr17_GL000205v2_random, chr1_KI270706v1_random, chr1_KI270707v1_random, chr1_KI270711v1_random, chr1_KI270713v1_random, chr1_KI270714v1_random, chr22_KI270731v1_random, chr22_KI270733v1_random, chr22_KI270734v1_random, chr3_GL000221v1_random, chr4_GL000008v2_random, chr9_KI270719v1_random, chr9_KI270720v1_random, chrM, chrUn_GL000195v1, chrUn_GL000214v1, chrUn_GL000216v2, chrUn_GL000218v1, chrUn_GL000219v1, chrUn_GL000220v1, chrUn_GL000224v1, chrUn_KI270303v1, chrUn_KI270310v1, chrUn_KI270337v1, chrUn_KI270438v1, chrUn_KI270442v1, chrUn_KI270467v1, chrUn_KI270741v1, chrUn_KI270742v1, chrUn_KI270743v1, chrUn_KI270744v1, chrUn_KI270745v1, chrUn_KI270748v1, chrUn_KI270751v1, chrUn_KI270754v1 - in 'y': chrY Make sure to always combin [... truncated] Warning in .Seqinfo.mergexy(x, y) : Each of the 2 combined objects has sequence levels not in the other: - in 'x': chr11_KI270721v1_random, chr14_GL000009v2_random, chr14_GL000194v1_random, chr14_GL000225v1_random, chr14_KI270726v1_random, chr15_KI270727v1_random, chr16_KI270728v1_random, chr17_GL000205v2_random, chr1_KI270706v1_random, chr1_KI270707v1_random, chr1_KI270711v1_random, chr1_KI270713v1_random, chr1_KI270714v1_random, chr22_KI270731v1_random, chr22_KI270733v1_random, chr22_KI270734v1_random, chr3_GL000221v1_random, chr4_GL000008v2_random, chr9_KI270719v1_random, chr9_KI270720v1_random, chrM, chrUn_GL000195v1, chrUn_GL000214v1, chrUn_GL000216v2, chrUn_GL000218v1, chrUn_GL000219v1, chrUn_GL000220v1, chrUn_GL000224v1, chrUn_KI270303v1, chrUn_KI270310v1, chrUn_KI270337v1, chrUn_KI270438v1, chrUn_KI270442v1, chrUn_KI270467v1, chrUn_KI270741v1, chrUn_KI270742v1, chrUn_KI270743v1, chrUn_KI270744v1, chrUn_KI270745v1, chrUn_KI270748v1, chrUn_KI270751v1, chrUn_KI270754v1 - in 'y': chrY Make sure to always combin [... truncated] Warning in .Seqinfo.mergexy(x, y) : Each of the 2 combined objects has sequence levels not in the other: - in 'x': chr11_KI270721v1_random, chr14_GL000225v1_random, chr15_KI270727v1_random, chr16_KI270728v1_random, chr17_GL000205v2_random, chr1_KI270712v1_random, chr1_KI270713v1_random, chr22_KI270731v1_random, chr22_KI270733v1_random, chr22_KI270734v1_random, chr9_KI270718v1_random, chr9_KI270719v1_random, chrM, chrUn_GL000195v1, chrUn_GL000214v1, chrUn_GL000219v1, chrUn_GL000220v1, chrUn_GL000224v1, chrUn_KI270310v1, chrUn_KI270330v1, chrUn_KI270438v1, chrUn_KI270743v1, chrUn_KI270744v1 - in 'y': chrY Make sure to always combine/compare objects based on the same reference genome (use suppressWarnings() to suppress this warning). Warning in .Seqinfo.mergexy(x, y) : Each of the 2 combined objects has sequence levels not in the other: - in 'x': chr14_GL000009v2_random, chr14_GL000194v1_random, chr14_GL000225v1_random, chr14_KI270726v1_random, chr15_KI270727v1_random, chr16_KI270728v1_random, chr17_GL000205v2_random, chr1_KI270706v1_random, chr1_KI270711v1_random, chr1_KI270713v1_random, chr1_KI270714v1_random, chr22_KI270731v1_random, chr22_KI270733v1_random, chr22_KI270734v1_random, chr3_GL000221v1_random, chr4_GL000008v2_random, chrM, chrUn_GL000195v1, chrUn_GL000214v1, chrUn_GL000216v2, chrUn_GL000218v1, chrUn_GL000219v1, chrUn_GL000220v1, chrUn_GL000224v1, chrUn_KI270442v1, chrUn_KI270741v1, chrUn_KI270742v1, chrUn_KI270743v1, chrUn_KI270744v1, chrUn_KI270745v1, chrUn_KI270751v1, chrUn_KI270754v1 - in 'y': chrY Make sure to always combine/compare objects based on the same reference genome (use suppressWarnings() to suppress this warning). Warning in .Seqinfo.mergexy(x, y) : Each of the 2 combined objects has sequence levels not in the other: - in 'x': chr11_KI270721v1_random, chr14_GL000225v1_random, chr15_KI270727v1_random, chr16_KI270728v1_random, chr17_GL000205v2_random, chr1_KI270712v1_random, chr1_KI270713v1_random, chr22_KI270731v1_random, chr22_KI270733v1_random, chr22_KI270734v1_random, chr9_KI270718v1_random, chr9_KI270719v1_random, chrM, chrUn_GL000195v1, chrUn_GL000214v1, chrUn_GL000219v1, chrUn_GL000220v1, chrUn_GL000224v1, chrUn_KI270310v1, chrUn_KI270330v1, chrUn_KI270438v1, chrUn_KI270743v1, chrUn_KI270744v1 - in 'y': chrY Make sure to always combine/compare objects based on the same reference genome (use suppressWarnings() to suppress this warning). Warning in .Seqinfo.mergexy(x, y) : Each of the 2 combined objects has sequence levels not in the other: - in 'x': chr14_GL000009v2_random, chr14_GL000194v1_random, chr14_GL000225v1_random, chr14_KI270726v1_random, chr15_KI270727v1_random, chr16_KI270728v1_random, chr17_GL000205v2_random, chr1_KI270706v1_random, chr1_KI270711v1_random, chr1_KI270713v1_random, chr1_KI270714v1_random, chr22_KI270731v1_random, chr22_KI270733v1_random, chr22_KI270734v1_random, chr3_GL000221v1_random, chr4_GL000008v2_random, chrM, chrUn_GL000195v1, chrUn_GL000214v1, chrUn_GL000216v2, chrUn_GL000218v1, chrUn_GL000219v1, chrUn_GL000220v1, chrUn_GL000224v1, chrUn_KI270442v1, chrUn_KI270741v1, chrUn_KI270742v1, chrUn_KI270743v1, chrUn_KI270744v1, chrUn_KI270745v1, chrUn_KI270751v1, chrUn_KI270754v1 - in 'y': chrY Make sure to always combine/compare objects based on the same reference genome (use suppressWarnings() to suppress this warning). Warning in .Seqinfo.mergexy(x, y) : Each of the 2 combined objects has sequence levels not in the other: - in 'x': chr11_KI270721v1_random, chr14_GL000009v2_random, chr14_GL000194v1_random, chr14_GL000225v1_random, chr14_KI270726v1_random, chr15_KI270727v1_random, chr16_KI270728v1_random, chr17_GL000205v2_random, chr1_KI270706v1_random, chr1_KI270707v1_random, chr1_KI270711v1_random, chr1_KI270713v1_random, chr1_KI270714v1_random, chr22_KI270731v1_random, chr22_KI270733v1_random, chr22_KI270734v1_random, chr3_GL000221v1_random, chr4_GL000008v2_random, chr9_KI270719v1_random, chr9_KI270720v1_random, chrM, chrUn_GL000195v1, chrUn_GL000214v1, chrUn_GL000216v2, chrUn_GL000218v1, chrUn_GL000219v1, chrUn_GL000220v1, chrUn_GL000224v1, chrUn_KI270303v1, chrUn_KI270310v1, chrUn_KI270337v1, chrUn_KI270438v1, chrUn_KI270442v1, chrUn_KI270467v1, chrUn_KI270741v1, chrUn_KI270742v1, chrUn_KI270743v1, chrUn_KI270744v1, chrUn_KI270745v1, chrUn_KI270748v1, chrUn_KI270751v1, chrUn_KI270754v1 - in 'y': chrY Make sure to always combin [... truncated] Warning in .Seqinfo.mergexy(x, y) : Each of the 2 combined objects has sequence levels not in the other: - in 'x': chr11_KI270721v1_random, chr14_GL000009v2_random, chr14_GL000194v1_random, chr14_GL000225v1_random, chr14_KI270726v1_random, chr15_KI270727v1_random, chr16_KI270728v1_random, chr17_GL000205v2_random, chr1_KI270706v1_random, chr1_KI270707v1_random, chr1_KI270711v1_random, chr1_KI270713v1_random, chr1_KI270714v1_random, chr22_KI270731v1_random, chr22_KI270733v1_random, chr22_KI270734v1_random, chr3_GL000221v1_random, chr4_GL000008v2_random, chr9_KI270719v1_random, chr9_KI270720v1_random, chrM, chrUn_GL000195v1, chrUn_GL000214v1, chrUn_GL000216v2, chrUn_GL000218v1, chrUn_GL000219v1, chrUn_GL000220v1, chrUn_GL000224v1, chrUn_KI270303v1, chrUn_KI270310v1, chrUn_KI270337v1, chrUn_KI270438v1, chrUn_KI270442v1, chrUn_KI270467v1, chrUn_KI270741v1, chrUn_KI270742v1, chrUn_KI270743v1, chrUn_KI270744v1, chrUn_KI270745v1, chrUn_KI270748v1, chrUn_KI270751v1, chrUn_KI270754v1 - in 'y': chrY Make sure to always combin [... truncated] Warning in .Seqinfo.mergexy(x, y) : Each of the 2 combined objects has sequence levels not in the other: - in 'x': chr11_KI270721v1_random, chr14_GL000225v1_random, chr15_KI270727v1_random, chr16_KI270728v1_random, chr17_GL000205v2_random, chr1_KI270712v1_random, chr1_KI270713v1_random, chr22_KI270731v1_random, chr22_KI270733v1_random, chr22_KI270734v1_random, chr9_KI270718v1_random, chr9_KI270719v1_random, chrM, chrUn_GL000195v1, chrUn_GL000214v1, chrUn_GL000219v1, chrUn_GL000220v1, chrUn_GL000224v1, chrUn_KI270310v1, chrUn_KI270330v1, chrUn_KI270438v1, chrUn_KI270743v1, chrUn_KI270744v1 - in 'y': chrY Make sure to always combine/compare objects based on the same reference genome (use suppressWarnings() to suppress this warning). Warning in .Seqinfo.mergexy(x, y) : Each of the 2 combined objects has sequence levels not in the other: - in 'x': chr14_GL000009v2_random, chr14_GL000194v1_random, chr14_GL000225v1_random, chr14_KI270726v1_random, chr15_KI270727v1_random, chr16_KI270728v1_random, chr17_GL000205v2_random, chr1_KI270706v1_random, chr1_KI270711v1_random, chr1_KI270713v1_random, chr1_KI270714v1_random, chr22_KI270731v1_random, chr22_KI270733v1_random, chr22_KI270734v1_random, chr3_GL000221v1_random, chr4_GL000008v2_random, chrM, chrUn_GL000195v1, chrUn_GL000214v1, chrUn_GL000216v2, chrUn_GL000218v1, chrUn_GL000219v1, chrUn_GL000220v1, chrUn_GL000224v1, chrUn_KI270442v1, chrUn_KI270741v1, chrUn_KI270742v1, chrUn_KI270743v1, chrUn_KI270744v1, chrUn_KI270745v1, chrUn_KI270751v1, chrUn_KI270754v1 - in 'y': chrY Make sure to always combine/compare objects based on the same reference genome (use suppressWarnings() to suppress this warning). Warning in .Seqinfo.mergexy(x, y) : Each of the 2 combined objects has sequence levels not in the other: - in 'x': chr11_KI270721v1_random, chr14_GL000225v1_random, chr15_KI270727v1_random, chr16_KI270728v1_random, chr17_GL000205v2_random, chr1_KI270712v1_random, chr1_KI270713v1_random, chr22_KI270731v1_random, chr22_KI270733v1_random, chr22_KI270734v1_random, chr9_KI270718v1_random, chr9_KI270719v1_random, chrM, chrUn_GL000195v1, chrUn_GL000214v1, chrUn_GL000219v1, chrUn_GL000220v1, chrUn_GL000224v1, chrUn_KI270310v1, chrUn_KI270330v1, chrUn_KI270438v1, chrUn_KI270743v1, chrUn_KI270744v1 - in 'y': chrY Make sure to always combine/compare objects based on the same reference genome (use suppressWarnings() to suppress this warning). Warning in .Seqinfo.mergexy(x, y) : Each of the 2 combined objects has sequence levels not in the other: - in 'x': chr14_GL000009v2_random, chr14_GL000194v1_random, chr14_GL000225v1_random, chr14_KI270726v1_random, chr15_KI270727v1_random, chr16_KI270728v1_random, chr17_GL000205v2_random, chr1_KI270706v1_random, chr1_KI270711v1_random, chr1_KI270713v1_random, chr1_KI270714v1_random, chr22_KI270731v1_random, chr22_KI270733v1_random, chr22_KI270734v1_random, chr3_GL000221v1_random, chr4_GL000008v2_random, chrM, chrUn_GL000195v1, chrUn_GL000214v1, chrUn_GL000216v2, chrUn_GL000218v1, chrUn_GL000219v1, chrUn_GL000220v1, chrUn_GL000224v1, chrUn_KI270442v1, chrUn_KI270741v1, chrUn_KI270742v1, chrUn_KI270743v1, chrUn_KI270744v1, chrUn_KI270745v1, chrUn_KI270751v1, chrUn_KI270754v1 - in 'y': chrY Make sure to always combine/compare objects based on the same reference genome (use suppressWarnings() to suppress this warning). Saving 7 x 5 in image Warning: non-unique values when setting 'row.names': 'phase_1_05_jan_2022.S_1_R1', 'phase_1_05_jan_2022.S_1_R2', 'phase_2_03_feb_2022.S_4_R1', 'phase_2_28_jan_2022.S_2_R1', 'phase_2_28_jan_2022.S_3_R1', 'phase_2_28_jan_2022.S_4_R1', 'phase_2_28_jan_2022.S_5_R1', 'phase_2_28_jan_2022.S_6_R1' Quitting from lines 167-179 (EpiCompare.Rmd) Error in `.rowNamesDF<-`(x, value = value) : duplicate 'row.names' are not allowed ```

Session info

``` R version 4.1.0 (2021-05-18) Platform: x86_64-apple-darwin17.0 (64-bit) Running under: macOS Big Sur 11.4 Matrix products: default LAPACK: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRlapack.dylib locale: [1] en_GB.UTF-8/en_GB.UTF-8/en_GB.UTF-8/C/en_GB.UTF-8/en_GB.UTF-8 attached base packages: [1] stats4 stats graphics grDevices utils datasets methods [8] base other attached packages: [1] org.Hs.eg.db_3.14.0 AnnotationDbi_1.56.2 Biobase_2.54.0 [4] GenomicRanges_1.46.1 GenomeInfoDb_1.30.1 IRanges_2.28.0 [7] S4Vectors_0.32.3 BiocGenerics_0.40.0 dplyr_1.0.8 [10] EpiCompare_0.99.0 loaded via a namespace (and not attached): [1] utf8_1.2.2 [2] tidyselect_1.1.1 [3] heatmaply_1.3.0 [4] RSQLite_2.2.9 [5] htmlwidgets_1.5.4 [6] grid_4.1.0 [7] TSP_1.1-11 [8] BiocParallel_1.28.3 [9] scatterpie_0.1.7 [10] munsell_0.5.0 [11] codetools_0.2-18 [12] ragg_1.2.1 [13] colorspace_2.0-2 [14] GOSemSim_2.20.0 [15] filelock_1.0.2 [16] highr_0.9 [17] knitr_1.37 [18] rstudioapi_0.13 [19] ggsignif_0.6.3 [20] DOSE_3.20.1 [21] MatrixGenerics_1.6.0 [22] labeling_0.4.2 [23] GenomeInfoDbData_1.2.7 [24] polyclip_1.10-0 [25] seqPattern_1.26.0 [26] bit64_4.0.5 [27] farver_2.1.0 [28] downloader_0.4 [29] rprojroot_2.0.2 [30] treeio_1.18.1 [31] vctrs_0.3.8 [32] generics_0.1.2 [33] xfun_0.29 [34] BiocFileCache_2.2.1 [35] R6_2.5.1 [36] graphlayouts_0.8.0 [37] seriation_1.3.2 [38] locfit_1.5-9.4 [39] gridGraphics_0.5-1 [40] fgsea_1.20.0 [41] bitops_1.0-7 [42] BRGenomics_1.6.0 [43] cachem_1.0.6 [44] DelayedArray_0.20.0 [45] assertthat_0.2.1 [46] BiocIO_1.4.0 [47] scales_1.1.1 [48] vroom_1.5.7 [49] ggraph_2.0.5 [50] enrichplot_1.14.1 [51] gtable_0.3.0 [52] tidygraph_1.2.0 [53] rlang_1.0.1 [54] genefilter_1.76.0 [55] systemfonts_1.0.3 [56] splines_4.1.0 [57] rtracklayer_1.54.0 [58] rstatix_0.7.0 [59] lazyeval_0.2.2 [60] impute_1.68.0 [61] broom_0.7.12 [62] yaml_2.2.2 [63] reshape2_1.4.4 [64] abind_1.4-5 [65] GenomicFeatures_1.46.4 [66] crosstalk_1.2.0 [67] backports_1.4.1 [68] qvalue_2.26.0 [69] clusterProfiler_4.2.2 [70] tools_4.1.0 [71] ggplotify_0.1.0 [72] gridBase_0.4-7 [73] ggplot2_3.3.5 [74] gplots_3.1.1 [75] ellipsis_0.3.2 [76] jquerylib_0.1.4 [77] RColorBrewer_1.1-2 [78] Rcpp_1.0.8 [79] plyr_1.8.6 [80] progress_1.2.2 [81] zlibbioc_1.40.0 [82] purrr_0.3.4 [83] RCurl_1.98-1.6 [84] prettyunits_1.1.1 [85] ggpubr_0.4.0 [86] viridis_0.6.2 [87] SummarizedExperiment_1.24.0 [88] ggrepel_0.9.1 [89] here_1.0.1 [90] magrittr_2.0.2 [91] data.table_1.14.2 [92] DO.db_2.9 [93] matrixStats_0.61.0 [94] patchwork_1.1.1 [95] hms_1.1.1 [96] evaluate_0.14 [97] xtable_1.8-4 [98] XML_3.99-0.8 [99] gridExtra_2.3 [100] compiler_4.1.0 [101] biomaRt_2.50.3 [102] tibble_3.1.6 [103] shadowtext_0.1.1 [104] KernSmooth_2.23-20 [105] crayon_1.5.0 [106] htmltools_0.5.2 [107] ggfun_0.0.5 [108] tzdb_0.2.0 [109] aplot_0.1.2 [110] tidyr_1.2.0 [111] geneplotter_1.72.0 [112] DBI_1.1.2 [113] tweenr_1.0.2 [114] genomation_1.26.0 [115] ChIPseeker_1.30.3 [116] dbplyr_2.1.1 [117] MASS_7.3-55 [118] rappdirs_0.3.3 [119] boot_1.3-28 [120] Matrix_1.4-0 [121] car_3.0-12 [122] readr_2.1.2 [123] cli_3.2.0 [124] parallel_4.1.0 [125] igraph_1.2.11 [126] pkgconfig_2.0.3 [127] TxDb.Hsapiens.UCSC.hg19.knownGene_3.2.2 [128] GenomicAlignments_1.30.0 [129] registry_0.5-1 [130] plotly_4.10.0 [131] xml2_1.3.3 [132] foreach_1.5.2 [133] ggtree_3.2.1 [134] annotate_1.72.0 [135] bslib_0.3.1 [136] webshot_0.5.2 [137] XVector_0.34.0 [138] yulab.utils_0.0.4 [139] stringr_1.4.0 [140] digest_0.6.29 [141] Biostrings_2.62.0 [142] fastmatch_1.1-3 [143] rmarkdown_2.11 [144] tidytree_0.3.7 [145] dendextend_1.15.2 [146] restfulr_0.0.13 [147] curl_4.3.2 [148] gtools_3.9.2 [149] Rsamtools_2.10.0 [150] rjson_0.2.21 [151] nlme_3.1-155 [152] lifecycle_1.0.1 [153] jsonlite_1.7.3 [154] carData_3.0-5 [155] viridisLite_0.4.0 [156] BSgenome_1.62.0 [157] fansi_1.0.2 [158] pillar_1.7.0 [159] lattice_0.20-45 [160] KEGGREST_1.34.0 [161] fastmap_1.1.0 [162] httr_1.4.2 [163] plotrix_3.8-2 [164] survival_3.2-13 [165] GO.db_3.14.0 [166] glue_1.6.1 [167] remotes_2.4.2 [168] png_0.1-7 [169] iterators_1.0.14 [170] bit_4.0.4 [171] ggforce_0.3.3 [172] stringi_1.7.6 [173] sass_0.4.0 [174] blob_1.2.2 [175] textshaping_0.3.6 [176] DESeq2_1.34.0 [177] caTools_1.18.2 [178] memoise_2.0.1 [179] ape_5.6-1 ```
bschilder commented 2 years ago

Not sure exactly where this is happening, but make.unique() might come in handy.

bschilder commented 2 years ago

Ok, i see what happened. duplicate peak files for the sample samples are located in 03_peak_calling/04_called_peaks/ and 04_reporting/igv/

After omitting files from the latter folder, all names were unique.

Still, would be good to have a check at the beginning of your pipeline to check whether all names are unique. And if not, throw an error, or simply a warning saying that names will be forced to be unique.