Closed Deleetdk closed 1 year ago
I cannot acquire the pos2 because of some missing file error:
pos2
> POS2 <- snp_asGeneticPos(obj.bigSNP$map$chromosome, obj.bigSNP$map$physical.pos, dir = "tmp-data", ncores = NCORES) Error in { : task 1 failed - "No such file: tmp-data/chr2.OMNI.interpolated_genetic_map.gz"
I am just following the tutorial https://privefl.github.io/bigsnpr/articles/LDpred2.html except with my own dataset appropriately subsetted.
Here's my complete code for the LDpred2 section:
> #remotes::install_github("privefl/bigsnpr") > library(bigsnpr) Loading required package: bigstatsr Attaching package: ‘bigstatsr’ The following object is masked from ‘package:psych’: AUC > #variants in dataset > pvar_cols = cols( + `#CHROM` = col_double(), + POS = col_double(), + ID = col_character(), + REF = col_character(), + ALT = col_character(), + FILTER = col_character(), + INFO = col_character() + ) > dataset_pvar = read_table("dataset_v3.pvar", comment = "##", col_types = pvar_cols) |===================================================================================================================================================================================================| 100% 930 MB > #direct and imputed count > nrow(dataset_pvar) [1] 13038731 > #direct only > dataset_pvar_direct = dataset_pvar %>% filter(str_detect(INFO, "TYPED")) > dataset_pvar_direct %>% nrow() [1] 473624 > #hapmap3+ > hapmap3p = read_rds("/media/emil/luks8tb1/data/genomics/reference_files/ld_matrices/LD reference for HapMap3+/map_hm3_plus.rds") > #add chrpos38 > hapmap3p %<>% mutate( + chrpos = chr + ":" + pos_hg38 + ) > #overlap > dataset_hm3p_intersect = intersect(hapmap3p$chrpos, dataset_pvar$ID) > length(dataset_hm3p_intersect) [1] 1094701 > #ea4 gwas > ea4 = read_tsv("/media/emil/luks8tb1/data/genomics/gwas/EA4/ea4_top9k_hg38.tsv", col_types = cols(chrpos = col_character(), chrpos38 = col_character())) > nrow(ea4) [1] 8618 > #intersect of all 3 > ea4_intersect = intersect(ea4$chrpos38, dataset_hm3p_intersect) > length(ea4_intersect) [1] 1662 > #prep dataset data to other format > # snp_readBed2("dataset_v3.bed", ind.row = which(idx_in_G_with_data)) > obj.bigSNP <- snp_attach("dataset_v3.rds") > #determine which persons we need to prep genetic data for > dataset_psam = read_table("dataset_v3.psam") ── Column specification ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── cols( `#IID` = col_character(), SEX = col_logical() ) > ids_with_data = intersect(dataset_psam$`#IID`, d_cog$individual_id) > idx_in_G_with_data = obj.bigSNP$fam$sample.ID %in% ids_with_data > length(ids_with_data) [1] 968 > # Get aliases for useful slots > G <- obj.bigSNP$genotypes > CHR <- obj.bigSNP$map$chromosome > POS <- obj.bigSNP$map$physical.pos > y <- tibble(individual_id = ids_with_data) %>% left_join( + mirt_fit_scores, + by = "individual_id" + ) %>% pull(g) > (NCORES <- nb_cores()) [1] 31 > assert_that(length(y) == length(ids_with_data)) [1] TRUE > #split into train and test > set.seed(1) > ind.val <- sample(nrow(obj.bigSNP$genotypes), 500) > ind.test <- setdiff(1:nrow(obj.bigSNP$genotypes), ind.val) > length(ind.val) [1] 500 > length(ind.test) [1] 468 > #match snps > map <- setNames(obj.bigSNP$map[-3], c("chr", "rsid", "pos", "a1", "a0")) > #make ea4 with right cols > ea4_mod = ea4 %>% select(Chr, chrpos38, Other_allele, Effect_allele, Beta, SE) %>% mutate(n_eff = 3e6, chrpos38 = str_replace(chrpos38, "\\d:", "") %>% as.integer()) %>% set_names(c("chr", "pos", "a0", "a1", "beta", "beta_se", "n_neff")) > #align variants and alleles > df_beta <- snp_match(ea4_mod, map) 8,618 variants to be matched. 668 ambiguous SNPs have been removed. 3,309 variants have been matched; 1 were flipped and 1,517 were reversed. > #correlations between variants > POS2 <- snp_asGeneticPos(obj.bigSNP$map$chromosome, obj.bigSNP$map$physical.pos, dir = "tmp-data", ncores = NCORES) Error in { : task 1 failed - "No such file: tmp-data/chr2.OMNI.interpolated_genetic_map.gz"
I am using newest R and bigsnpr:
> sessionInfo() R version 4.2.3 (2023-03-15) Platform: x86_64-pc-linux-gnu (64-bit) Running under: Linux Mint 21.1 Matrix products: default BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0 LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0 locale: [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8 LC_MONETARY=en_DK.UTF-8 LC_MESSAGES=en_US.UTF-8 LC_PAPER=en_DK.UTF-8 [8] LC_NAME=C LC_ADDRESS=C LC_TELEPHONE=C LC_MEASUREMENT=en_DK.UTF-8 LC_IDENTIFICATION=C attached base packages: [1] stats4 stats graphics grDevices utils datasets methods base other attached packages: [1] bigsnpr_1.12.2 bigstatsr_1.5.12 doFuture_1.0.0 future_1.32.0 foreach_1.5.2 tictoc_1.1 rms_6.6-0 mirt_1.38.1 lattice_0.20-45 [10] readxl_1.4.2 kirkegaard_2023-04-25 psych_2.3.3 assertthat_0.2.1 weights_1.0.4 Hmisc_5.0-1 magrittr_2.0.3 lubridate_1.9.2 forcats_1.0.0 [19] stringr_1.5.0 dplyr_1.1.2 purrr_1.0.1 readr_2.1.4 tidyr_1.3.0 tibble_3.2.1 ggplot2_3.4.2 tidyverse_2.0.0 loaded via a namespace (and not attached): [1] TH.data_1.1-2 minqa_1.2.5 colorspace_2.1-0 htmlTable_2.4.1 base64enc_0.1-3 rstudioapi_0.14 mice_3.15.0 farver_2.1.1 listenv_0.9.0 [10] Deriv_4.1.3 MatrixModels_0.5-1 bit64_4.0.5 fansi_1.0.4 mvtnorm_1.1-3 xml2_1.3.3 R.methodsS3_1.8.2 codetools_0.2-19 splines_4.2.3 [19] doParallel_1.0.17 mnormt_2.1.1 knitr_1.42 Formula_1.2-5 nloptr_2.0.3 broom_1.0.4 cluster_2.1.4 R.oo_1.25.0 bigparallelr_0.3.2 [28] compiler_4.2.3 httr_1.4.5 backports_1.4.1 Matrix_1.5-4 fastmap_1.1.1 cli_3.6.1 admisc_0.31 htmltools_0.5.5 quantreg_5.95 [37] tools_4.2.3 gtable_0.3.3 glue_1.6.2 doRNG_1.8.6 Rcpp_1.0.10 cellranger_1.1.0 vctrs_0.6.2 gdata_2.18.0.1 svglite_2.1.1 [46] nlme_3.1-162 iterators_1.0.14 xfun_0.39 globals_0.16.2 lme4_1.1-32 rvest_1.0.3 timechange_0.2.0 lifecycle_1.0.3 dcurver_0.9.2 [55] rngtools_1.5.2 gtools_3.9.4 polspline_1.1.22 MASS_7.3-58.3 zoo_1.8-12 scales_1.2.1 vroom_1.6.1 flock_0.7 hms_1.1.3 [64] parallel_4.2.3 sandwich_3.0-2 SparseM_1.81 pbapply_1.7-0 gridExtra_2.3 rpart_4.1.19 stringi_1.7.12 checkmate_2.1.0 permute_0.9-7 [73] bigassertr_0.1.6 polycor_0.8-1 boot_1.3-28 rlang_1.1.0 pkgconfig_2.0.3 systemfonts_1.0.4 evaluate_0.20 labeling_0.4.2 htmlwidgets_1.6.2 [82] cowplot_1.1.1 bit_4.0.5 tidyselect_1.2.0 parallelly_1.35.0 bigsparser_0.6.1 R6_2.5.1 generics_0.1.3 multcomp_1.4-23 pillar_1.9.0 [91] foreign_0.8-82 withr_2.5.0 mgcv_1.8-42 survival_3.5-3 nnet_7.3-18 future.apply_1.10.0 crayon_1.5.2 utf8_1.2.3 tzdb_0.3.0 [100] rmarkdown_2.21 grid_4.2.3 data.table_1.14.8 vegan_2.6-4 digest_0.6.31 webshot_0.5.4 GPArotation_2023.3-1 R.utils_2.12.2 munsell_0.5.0 [109] viridisLite_0.4.1 kableExtra_1.3.4
I think your issue is similar as https://github.com/privefl/bigsnpr/issues/83.
Any update on this?
I cannot acquire the
pos2
because of some missing file error:I am just following the tutorial https://privefl.github.io/bigsnpr/articles/LDpred2.html except with my own dataset appropriately subsetted.
Here's my complete code for the LDpred2 section:
I am using newest R and bigsnpr: