privefl / bigsnpr

R package for the analysis of massive SNP arrays.
https://privefl.github.io/bigsnpr/
186 stars 44 forks source link

No such file: tmp-data/chr2.OMNI.interpolated_genetic_map.gz #422

Closed Deleetdk closed 1 year ago

Deleetdk commented 1 year ago

I cannot acquire the pos2 because of some missing file error:

> POS2 <- snp_asGeneticPos(obj.bigSNP$map$chromosome, obj.bigSNP$map$physical.pos, dir = "tmp-data", ncores = NCORES)
Error in { : 
  task 1 failed - "No such file: tmp-data/chr2.OMNI.interpolated_genetic_map.gz"

I am just following the tutorial https://privefl.github.io/bigsnpr/articles/LDpred2.html except with my own dataset appropriately subsetted.

Here's my complete code for the LDpred2 section:

> #remotes::install_github("privefl/bigsnpr")
> library(bigsnpr)
Loading required package: bigstatsr

Attaching package: ‘bigstatsr’

The following object is masked from ‘package:psych’:

    AUC

> #variants in dataset
> pvar_cols = cols(
+   `#CHROM` = col_double(),
+   POS = col_double(),
+   ID = col_character(),
+   REF = col_character(),
+   ALT = col_character(),
+   FILTER = col_character(),
+   INFO = col_character()
+ )
> dataset_pvar = read_table("dataset_v3.pvar", comment = "##", col_types = pvar_cols)
|===================================================================================================================================================================================================| 100% 930 MB
> #direct and imputed count
> nrow(dataset_pvar)
[1] 13038731
> #direct only
> dataset_pvar_direct = dataset_pvar %>% filter(str_detect(INFO, "TYPED"))
> dataset_pvar_direct %>% nrow()
[1] 473624
> #hapmap3+
> hapmap3p = read_rds("/media/emil/luks8tb1/data/genomics/reference_files/ld_matrices/LD reference for HapMap3+/map_hm3_plus.rds")
> #add chrpos38
> hapmap3p %<>% mutate(
+   chrpos = chr + ":" + pos_hg38
+ )
> #overlap
> dataset_hm3p_intersect = intersect(hapmap3p$chrpos, dataset_pvar$ID)
> length(dataset_hm3p_intersect)
[1] 1094701
> #ea4 gwas
> ea4 = read_tsv("/media/emil/luks8tb1/data/genomics/gwas/EA4/ea4_top9k_hg38.tsv", col_types = cols(chrpos = col_character(), chrpos38 = col_character()))
> nrow(ea4)                                                                                                                                                                                                                                         
[1] 8618
> #intersect of all 3
> ea4_intersect = intersect(ea4$chrpos38, dataset_hm3p_intersect)
> length(ea4_intersect)
[1] 1662
> #prep dataset data to other format
> # snp_readBed2("dataset_v3.bed", ind.row = which(idx_in_G_with_data))
> obj.bigSNP <- snp_attach("dataset_v3.rds")
> #determine which persons we need to prep genetic data for
> dataset_psam = read_table("dataset_v3.psam")

── Column specification ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
cols(
  `#IID` = col_character(),
  SEX = col_logical()
)

> ids_with_data = intersect(dataset_psam$`#IID`, d_cog$individual_id)
> idx_in_G_with_data = obj.bigSNP$fam$sample.ID %in% ids_with_data
> length(ids_with_data)
[1] 968
> # Get aliases for useful slots
> G   <- obj.bigSNP$genotypes
> CHR <- obj.bigSNP$map$chromosome
> POS <- obj.bigSNP$map$physical.pos
> y   <- tibble(individual_id = ids_with_data) %>% left_join(
+   mirt_fit_scores,
+   by = "individual_id"
+ ) %>% pull(g)
> (NCORES <- nb_cores())
[1] 31
> assert_that(length(y) == length(ids_with_data))
[1] TRUE
> #split into train and test
> set.seed(1)
> ind.val <- sample(nrow(obj.bigSNP$genotypes), 500)
> ind.test <- setdiff(1:nrow(obj.bigSNP$genotypes), ind.val)
> length(ind.val)
[1] 500
> length(ind.test)
[1] 468
> #match snps
> map <- setNames(obj.bigSNP$map[-3], c("chr", "rsid", "pos", "a1", "a0"))
> #make ea4 with right cols
> ea4_mod = ea4 %>% select(Chr, chrpos38, Other_allele, Effect_allele, Beta, SE) %>% mutate(n_eff = 3e6, chrpos38 = str_replace(chrpos38, "\\d:", "") %>% as.integer()) %>% set_names(c("chr", "pos", "a0", "a1", "beta", "beta_se", "n_neff"))
> #align variants and alleles
> df_beta <- snp_match(ea4_mod, map)
8,618 variants to be matched.
668 ambiguous SNPs have been removed.
3,309 variants have been matched; 1 were flipped and 1,517 were reversed.
> #correlations between variants
> POS2 <- snp_asGeneticPos(obj.bigSNP$map$chromosome, obj.bigSNP$map$physical.pos, dir = "tmp-data", ncores = NCORES)
Error in { : 
  task 1 failed - "No such file: tmp-data/chr2.OMNI.interpolated_genetic_map.gz"

I am using newest R and bigsnpr:

> sessionInfo()
R version 4.2.3 (2023-03-15)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Linux Mint 21.1

Matrix products: default
BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0
LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C               LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8     LC_MONETARY=en_DK.UTF-8    LC_MESSAGES=en_US.UTF-8    LC_PAPER=en_DK.UTF-8      
 [8] LC_NAME=C                  LC_ADDRESS=C               LC_TELEPHONE=C             LC_MEASUREMENT=en_DK.UTF-8 LC_IDENTIFICATION=C       

attached base packages:
[1] stats4    stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
 [1] bigsnpr_1.12.2        bigstatsr_1.5.12      doFuture_1.0.0        future_1.32.0         foreach_1.5.2         tictoc_1.1            rms_6.6-0             mirt_1.38.1           lattice_0.20-45      
[10] readxl_1.4.2          kirkegaard_2023-04-25 psych_2.3.3           assertthat_0.2.1      weights_1.0.4         Hmisc_5.0-1           magrittr_2.0.3        lubridate_1.9.2       forcats_1.0.0        
[19] stringr_1.5.0         dplyr_1.1.2           purrr_1.0.1           readr_2.1.4           tidyr_1.3.0           tibble_3.2.1          ggplot2_3.4.2         tidyverse_2.0.0      

loaded via a namespace (and not attached):
  [1] TH.data_1.1-2        minqa_1.2.5          colorspace_2.1-0     htmlTable_2.4.1      base64enc_0.1-3      rstudioapi_0.14      mice_3.15.0          farver_2.1.1         listenv_0.9.0       
 [10] Deriv_4.1.3          MatrixModels_0.5-1   bit64_4.0.5          fansi_1.0.4          mvtnorm_1.1-3        xml2_1.3.3           R.methodsS3_1.8.2    codetools_0.2-19     splines_4.2.3       
 [19] doParallel_1.0.17    mnormt_2.1.1         knitr_1.42           Formula_1.2-5        nloptr_2.0.3         broom_1.0.4          cluster_2.1.4        R.oo_1.25.0          bigparallelr_0.3.2  
 [28] compiler_4.2.3       httr_1.4.5           backports_1.4.1      Matrix_1.5-4         fastmap_1.1.1        cli_3.6.1            admisc_0.31          htmltools_0.5.5      quantreg_5.95       
 [37] tools_4.2.3          gtable_0.3.3         glue_1.6.2           doRNG_1.8.6          Rcpp_1.0.10          cellranger_1.1.0     vctrs_0.6.2          gdata_2.18.0.1       svglite_2.1.1       
 [46] nlme_3.1-162         iterators_1.0.14     xfun_0.39            globals_0.16.2       lme4_1.1-32          rvest_1.0.3          timechange_0.2.0     lifecycle_1.0.3      dcurver_0.9.2       
 [55] rngtools_1.5.2       gtools_3.9.4         polspline_1.1.22     MASS_7.3-58.3        zoo_1.8-12           scales_1.2.1         vroom_1.6.1          flock_0.7            hms_1.1.3           
 [64] parallel_4.2.3       sandwich_3.0-2       SparseM_1.81         pbapply_1.7-0        gridExtra_2.3        rpart_4.1.19         stringi_1.7.12       checkmate_2.1.0      permute_0.9-7       
 [73] bigassertr_0.1.6     polycor_0.8-1        boot_1.3-28          rlang_1.1.0          pkgconfig_2.0.3      systemfonts_1.0.4    evaluate_0.20        labeling_0.4.2       htmlwidgets_1.6.2   
 [82] cowplot_1.1.1        bit_4.0.5            tidyselect_1.2.0     parallelly_1.35.0    bigsparser_0.6.1     R6_2.5.1             generics_0.1.3       multcomp_1.4-23      pillar_1.9.0        
 [91] foreign_0.8-82       withr_2.5.0          mgcv_1.8-42          survival_3.5-3       nnet_7.3-18          future.apply_1.10.0  crayon_1.5.2         utf8_1.2.3           tzdb_0.3.0          
[100] rmarkdown_2.21       grid_4.2.3           data.table_1.14.8    vegan_2.6-4          digest_0.6.31        webshot_0.5.4        GPArotation_2023.3-1 R.utils_2.12.2       munsell_0.5.0       
[109] viridisLite_0.4.1    kableExtra_1.3.4    
privefl commented 1 year ago

I think your issue is similar as https://github.com/privefl/bigsnpr/issues/83.

privefl commented 1 year ago

Any update on this?