xinhe-lab / ctwas

package for the causal TWAS project
https://xinhe-lab.github.io/ctwas/
MIT License
32 stars 12 forks source link

``ctwas_rss`` vignette error #9

Closed jokamoto97 closed 10 months ago

jokamoto97 commented 11 months ago

Hello,

I am trying to run thectwas_rss function example from the ctwas_rss_wkfl.Rmd vignette. I ran the code:

library(ctwas)

data(z_snp)
head(z_snp)

ld_pgenfs <- system.file("extdata/example_genotype_files", paste0("example_chr", 1:22, ".pgen"), package = "ctwas")
ld_pgenfs

ld_R_dir <- system.file("extdata/example_ld_R", package = "ctwas")
list.files(ld_R_dir)[1:10]

head(read.table(system.file("extdata/example_ld_R","example_chr1.R_snp.0_5.Rvar",  package = "ctwas"), header = T))

weight.fusion <- system.file("extdata/example_fusion_weights", "Tissue", package = "ctwas")

weight.predictdb <- system.file("extdata", "example_tissue.db", package = "ctwas") 

regionsfile <- system.file("extdata", "example_regions.bed", package = "ctwas")
head(read.table(regionsfile, header = T))

outputdir <- "~/temp"
res <- impute_expr_z(z_snp = z_snp,  weight = weight.fusion, ld_pgenfs = ld_pgenfs,
                           method = "lasso", outputdir = outputdir,
                           outname = "test_ss")
z_gene <- res$z_gene
ld_exprfs <- res$ld_exprfs

head(z_gene)
ld_exprfs

pars <- ctwas_rss(z_gene = z_gene, z_snp = z_snp, ld_exprfs = ld_exprfs, ld_pgenfs = ld_pgenfs, ld_regions_custom = regionsfile, thin = 0.9, max_snp_region = 20, outputdir = outputdir, outname = "test_ss", ncore = 1)

When I run the final line above, I get the error:

Error in { : task 1 failed - "'length = 4' in coercion to 'logical(1)'"
In addition: Warning messages:
1: In data.table::fread(exprvarf, header = T) :
  File '~/temp/test_ss_chr21.exprvar' has size 0. Returning a NULL data.table.
2: In data.table::fread(exprvarf, header = T) :
  File '~/temp/test_ss_chr22.exprvar' has size 0. Returning a NULL data.table.

I am trying to run ctwas and ctwas_rss on some of my own data and get a very similar error to above.

Do you know what may be causing this? My sessionInfo() is below:

R version 4.3.1 (2023-06-16)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Ubuntu 20.04.6 LTS

Matrix products: default
BLAS:   /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3 
LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/liblapack.so.3;  LAPACK version 3.9.0

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
 [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       

time zone: America/New_York
tzcode source: system (glibc)

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
[1] ctwas_0.1.34

loaded via a namespace (and not attached):
 [1] doParallel_1.0.17 crayon_1.5.2      vctrs_0.6.3       cli_3.6.1        
 [5] rlang_1.1.1       generics_0.1.3    data.table_1.14.8 glue_1.6.2       
 [9] fansi_1.0.4       grid_4.3.1        tibble_3.2.1      foreach_1.5.2    
[13] lifecycle_1.0.3   compiler_4.3.1    dplyr_1.1.2       codetools_0.2-19 
[17] Rcpp_1.0.11       pkgconfig_2.0.3   pgenlibr_0.3.5    lattice_0.21-8   
[21] R6_2.5.1          tidyselect_1.2.0  utf8_1.2.3        parallel_4.3.1   
[25] pillar_1.9.0      magrittr_2.0.3    Matrix_1.6-0      tools_4.3.1      
[29] withr_2.5.0       iterators_1.0.14  logging_0.10-108 

Thanks in advance,

Jeff

sq-96 commented 10 months ago

This is an outdated vignette. Could you try this one: https://xinhe-lab.github.io/ctwas/articles/transition.html?

jokamoto97 commented 10 months ago

I tried the vignette at https://xinhe-lab.github.io/ctwas/articles/transition.html and continue to get a similar error as above when running the ctwas_rss function. My code and the resulting error are below:

library(ctwas)
library(gwasvcf)

dir.create("~/Integrative/project3/ctwas_example/gwas_summary_stats")

system("wget https://gwas.mrcieu.ac.uk/files/ukb-d-30780_irnt/ukb-d-30780_irnt.vcf.gz -P ~/Integrative/project3/ctwas_example/gwas_summary_stats")
R.utils::gunzip("~/Integrative/project3/ctwas_example/gwas_summary_stats/ukb-d-30780_irnt.vcf.gz")

#read the data
z_snp <- VariantAnnotation::readVcf("~/Integrative/project3/ctwas_example/gwas_summary_stats/ukb-d-30780_irnt.vcf")
z_snp <- as.data.frame(gwasvcf::vcf_to_tibble(z_snp))

#compute the z-scores
z_snp$Z <- z_snp$ES/z_snp$SE

#collect sample size (most frequent sample size for all variants)
gwas_n <- as.numeric(names(sort(table(z_snp$SS),decreasing=TRUE)[1]))

#subset the columns and format the column names
z_snp <- z_snp[,c("rsid", "ALT", "REF", "Z")]
colnames(z_snp) <- c("id", "A1", "A2", "z")

#drop multiallelic variants (id not unique)
z_snp <- z_snp[!(z_snp$id %in% z_snp$id[duplicated(z_snp$id)]),]

#save the formatted z-scores and GWAS sample size
saveRDS(z_snp, file="~/Integrative/project3/ctwas_example/gwas_summary_stats/ukb-d-30780_irnt.RDS")
saveRDS(gwas_n, file="~/Integrative/project3/ctwas_example/gwas_summary_stats/gwas_n.RDS")

z_snp <- readRDS("~/Integrative/project3/ctwas_example/gwas_summary_stats/ukb-d-30780_irnt.RDS")
gwas_n <- readRDS("~/Integrative/project3/ctwas_example/gwas_summary_stats/gwas_n.RDS")

head(z_snp)

gwas_n

weight_fusion <- system.file("extdata/example_fusion_weights", "Tissue", package = "ctwas")

list.files(dirname(weight_fusion))

list.files(weight_fusion)

rm(weight_fusion)

weight_subset <- system.file("extdata/weights_nolnc", "mashr_Liver_nolnc_subset.db", package = "ctwas")

regions <- system.file("extdata/ldetect", "EUR.b38.bed", package = "ctwas")

regions_df <- read.table(regions, header = T)

locus_chr <- "chr16"
locus_start <- 71020125

regions_df[which(regions_df$chr==locus_chr & regions_df$start==locus_start)+c(-2:2),]

rm(regions)

regions_df <- regions_df[regions_df$chr==locus_chr & regions_df$start==locus_start,]

dir.create("~/Integrative/project3/ctwas_example/regions", showWarnings=F)
regions_file <- "~/Integrative/project3/ctwas_example/regions/regions_subset.bed"
write.table(regions_df, file=regions_file, row.names=F, col.names=T, sep="\t", quote = F)

rm(regions_df)

ld_pgenfs <- system.file("extdata/example_genotype_files", paste0("example_chr", 1:22, ".pgen"), package = "ctwas")
head(ld_pgenfs)

rm(ld_pgenfs)

ld_R_dir <- system.file("extdata/ld_matrices", package = "ctwas")
list.files(ld_R_dir)

R_snp <- readRDS(system.file("extdata/ld_matrices", "example_locus_chr16.R_snp.71020125_72901251.RDS", package = "ctwas"))
str(R_snp)

R_snp_info <- read.table(system.file("extdata/ld_matrices", "example_locus_chr16.R_snp.71020125_72901251.Rvar", package = "ctwas"), header=T)
head(R_snp_info)

rm(R_snp)

system.file("extdata/scripts", "convert_geno_to_LDR_chr.R", package = "ctwas")

system.file("extdata/scripts", "convert_geno_to_LDR_chr_b37.R", package = "ctwas")

z_snp_subset <- z_snp[z_snp$id %in% R_snp_info$id,]

rm(R_snp_info)

saveRDS(z_snp_subset, file="~/Integrative/project3/ctwas_example/gwas_summary_stats/ukb-d-30780_irnt_subset.RDS")

z_snp_subset <- readRDS(system.file("extdata/summary_stats", "ukb-d-30780_irnt_subset.RDS", package = "ctwas"))

outputdir <- "~/Integrative/project3/ctwas_example/results/single_locus/"
outname <- "example_locus"

res <- impute_expr_z(z_snp = z_snp_subset,
                     weight = weight_subset,
                     ld_R_dir = ld_R_dir,
                     outputdir = outputdir,
                     outname = outname,
                     harmonize_z = T,
                     harmonize_wgt = T,
                     strand_ambig_action_z = "none",
                     recover_strand_ambig_wgt = T)

str(res)

z_gene_subset <- res$z_gene
ld_exprfs <- res$ld_exprfs
z_snp_subset <- res$z_snp

save(z_gene_subset, file = paste0(outputdir, outname, "_z_gene.Rd"))
save(ld_exprfs, file = paste0(outputdir, outname, "_ld_exprfs.Rd"))
save(z_snp_subset, file = paste0(outputdir, outname, "_z_snp.Rd"))

list.files(outputdir, pattern="chr16")

#the estimated prior inclusion probabilities for genes and variants from the paper
group_prior <- c(0.0107220302, 0.0001715896)

#the estimated effect sizes for genes and variants from the paper
group_prior_var <- c(41.327666, 9.977841)

# run ctwas_rss
ctwas_rss(z_gene = z_gene_subset,
          z_snp = z_snp_subset,
          ld_exprfs = ld_exprfs,
          ld_R_dir = ld_R_dir,
          ld_regions_custom = regions_file,
          outputdir = outputdir,
          outname = outname,
          estimate_group_prior = F,
          estimate_group_prior_var = F,
          group_prior = group_prior,
          group_prior_var = group_prior_var)
Error in { : 
  task 1 failed - "'length = 10975' in coercion to 'logical(1)'"
In addition: There were 21 warnings (use warnings() to see them)

Session info:

R version 4.3.1 (2023-06-16)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Ubuntu 20.04.6 LTS

Matrix products: default
BLAS:   /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3 
LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/liblapack.so.3;  LAPACK version 3.9.0

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
 [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       

time zone: America/New_York
tzcode source: system (glibc)

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
[1] gwasvcf_0.1.2 ctwas_0.1.35 

loaded via a namespace (and not attached):
 [1] tidyselect_1.2.0            dplyr_1.1.3                
 [3] blob_1.2.4                  pgenlibr_0.3.5             
 [5] filelock_1.0.2              R.utils_2.12.2             
 [7] Biostrings_2.68.1           bitops_1.0-7               
 [9] fastmap_1.1.1               RCurl_1.98-1.12            
[11] BiocFileCache_2.8.0         VariantAnnotation_1.46.0   
[13] GenomicAlignments_1.36.0    XML_3.99-0.14              
[15] digest_0.6.33               lifecycle_1.0.3            
[17] processx_3.8.2              KEGGREST_1.40.0            
[19] RSQLite_2.3.1               magrittr_2.0.3             
[21] compiler_4.3.1              rlang_1.1.1                
[23] progress_1.2.2              tools_4.3.1                
[25] yaml_2.3.7                  utf8_1.2.3                 
[27] data.table_1.14.8           rtracklayer_1.60.0         
[29] prettyunits_1.1.1           S4Arrays_1.0.5             
[31] bit_4.0.5                   pkgbuild_1.4.2             
[33] curl_5.0.1                  DelayedArray_0.26.7        
[35] xml2_1.3.5                  abind_1.4-5                
[37] BiocParallel_1.34.2         BiocGenerics_0.46.0        
[39] desc_1.4.2                  R.oo_1.25.0                
[41] grid_4.3.1                  stats4_4.3.1               
[43] fansi_1.0.4                 iterators_1.0.14           
[45] logging_0.10-108            biomaRt_2.56.1             
[47] SummarizedExperiment_1.30.2 cli_3.6.1                  
[49] crayon_1.5.2                generics_0.1.3             
[51] remotes_2.4.2.1             tzdb_0.4.0                 
[53] rjson_0.2.21                httr_1.4.6                 
[55] DBI_1.1.3                   cachem_1.0.8               
[57] stringr_1.5.0               zlibbioc_1.46.0            
[59] parallel_4.3.1              AnnotationDbi_1.62.2       
[61] XVector_0.40.0              restfulr_0.0.15            
[63] matrixStats_1.0.0           vctrs_0.6.3                
[65] Matrix_1.6-0                callr_3.7.3                
[67] IRanges_2.34.1              hms_1.1.3                  
[69] S4Vectors_0.38.1            bit64_4.0.5                
[71] GenomicFeatures_1.52.1      foreach_1.5.2              
[73] glue_1.6.2                  codetools_0.2-19           
[75] ps_1.7.5                    stringi_1.7.12             
[77] GenomeInfoDb_1.36.1         GenomicRanges_1.52.0       
[79] BiocIO_1.10.0               tibble_3.2.1               
[81] pillar_1.9.0                rappdirs_0.3.3             
[83] BSgenome_1.68.0             GenomeInfoDbData_1.2.10    
[85] R6_2.5.1                    dbplyr_2.3.3               
[87] doParallel_1.0.17           rprojroot_2.0.3            
[89] lattice_0.21-8              Biobase_2.60.0             
[91] readr_2.1.4                 R.methodsS3_1.8.2          
[93] png_0.1-8                   Rsamtools_2.16.0           
[95] memoise_2.0.1               Rcpp_1.0.11                
[97] MatrixGenerics_1.12.2       pkgconfig_2.0.3          
sq-96 commented 10 months ago

Hi Jeff, We haven't tested on R/4.3.1 and suspect the error might be due to R version. Could you try to run it with R/4.1.0?

jokamoto97 commented 10 months ago

Yes, I was able to successfully run it with R/4.1.0.

Thanks for the help!