Open AMCalejandro opened 2 years ago
This step is being handled by echodata:::standardize_maf
:
https://github.com/RajLabMSSM/echodata/blob/main/R/standardize_maf.R
I think it's failing to impute MAF because of some flaw in the logic. I've tried adjusting this so that it will trigger imputing MAF from your 'FREQ' col. After reinstalling, let me know if this works for you.
devtools::install_github("RajLabMSSM/echolocatoR", dependencies = TRUE)
Any luck with this @AMCalejandro ?
Hi,
I just rerun this using the same script I used when I opened this error.
It is falling for me on a a step in the query
GWAS header This is what i am passing to finemap_loci as newSS_name_colmap
> head(data_colmaps)
SNP CHR POS A1 A2 FREQ BETA SE P N
1: rs58276399 1 731718 t c 0.8837 -0.1775 0.1583 0.2621 1297
2: rs142557973 1 731718 t c 0.8837 -0.1775 0.1583 0.2621 1297
3: rs141242758 1 734349 t c 0.8843 -0.1577 0.1593 0.3223 1297
4: rs2073813 1 753541 a g 0.1257 0.0721 0.1177 0.5399 2687
5: rs61768174 1 766007 a c 0.9005 -0.2559 0.1642 0.1190 1297
6: rs60320384 1 769223 c g 0.8749 -0.0772 0.1178 0.5124 2687
columnsnames This is what I get after running construct_colmap
> columnsnames
$munged
[1] FALSE
$CHR
[1] "CHR"
$POS
[1] "POS"
$SNP
[1] "SNP"
$P
[1] "P"
$Effect
[1] "BETA"
$StdErr
[1] "SE"
$tstat
[1] "tstat"
$Locus
[1] "Locus"
$Freq
[1] "FREQ"
$MAF
[1] "calculate"
$A1
[1] "A1"
$A2
[1] "A2"
$Gene
[1] "Gene"
$N_cases
[1] "N_cases"
$N_controls
[1] "N_controls"
$proportion_cases
[1] "calculate"
$N
[1] "N"
$verbose
[1] TRUE
topSNPs
> topSNPs
# A tibble: 3 × 7
Locus Gene CHR POS SNP P BETA
<chr> <chr> <int> <dbl> <chr> <dbl> <dbl>
1 RP11-240A16.1 RP11-240A16.1 4 32435284 rs189093213 0.00000000167 1.12
2 XYLT1 XYLT1 16 17044975 rs180924818 0.00000000626 -1.14
3 LRP8 LRP8 1 53778300 rs72673189 0.0000000153 1.02
finemap_loci(# GENERAL ARGUMENTS
topSNPs = topSNPs,
results_dir = fullRS_path,
loci = topSNPs$Locus,
dataset_name = "LID_COX",
dataset_type = "GWAS",
force_new_subset = TRUE,
force_new_LD = FALSE,
force_new_finemap = TRUE,
remove_tmps = FALSE,
finemap_methods = c("ABF","FINEMAP","SUSIE"),
# Munge full sumstats first
munged = FALSE,
colmap = columnsnames,
# SUMMARY STATS ARGUMENTS
fullSS_path = newSS_name_colmap,
fullSS_genome_build = "hg19",
query_by ="tabix",
compute_n = 3500,
bp_distance = 10000,#500000*2,
min_MAF = 0.001,
trim_gene_limits = FALSE,
case_control = FALSE,
# FINE-MAPPING ARGUMENTS
## General
n_causal = 5,
credset_thresh = .95,
consensus_thresh = 2,
# LD ARGUMENTS
LD_reference = "1KGphase3",#"UKB",
superpopulation = "EUR",
download_method = "axel",
LD_genome_build = "hg19",
leadSNP_LD_block = FALSE,
#### PLotting args ####
plot_types = c("simple"),
show_plot = TRUE,
zoom = "1x",
tx_biotypes = NULL,
nott_epigenome = FALSE,
nott_show_placseq = FALSE,
nott_binwidth = 200,
nott_bigwig_dir = NULL,
xgr_libnames = NULL,
roadmap = FALSE,
roadmap_query = NULL,
#### General args ####
seed = 2022,
nThread = 20,
verbose = TRUE
)
] "+ Assigning Gene and Locus independently."
Standardising column headers.
First line of summary statistics file:
Locus Gene CHR POS SNP P BETA
Returning unmapped column names without making them uppercase.
+ Mapping colnames from MungeSumstats ==> echolocatoR
┌─────────────────────────────────────────────────┐
│ │
│ )))> 🦇 RP11-240A16.1 [locus 1 / 3] 🦇 <((( │
│ │
└─────────────────────────────────────────────────┘
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
── Step 1 ▶▶▶ Query 🔎 ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
+ Query Method: tabix
Constructing GRanges query using min/max ranges within a single chromosome.
query_dat is already a GRanges object. Returning directly.
========= echotabix::convert =========
Converting full summary stats file to tabix format for fast querying.
Inferred format: 'table'
Explicit format: 'table'
Inferring comment_char from tabular header: 'SNP'
Determining chrom type from file header.
Chromosome format: 1
Detecting column delimiter.
Identified column separator: \t
Sorting rows by coordinates via bash.
Searching for header row with grep.
( grep ^'SNP' .../QC_SNPs_COLMAP.txt; grep
-v ^'SNP' .../QC_SNPs_COLMAP.txt | sort
-k2,2n
-k3,3n ) > .../file11e24b7d23ea38_sorted.tsv
Constructing outputs
Using existing bgzipped file: /home/rstudio/echolocatoR/echolocatoR_LID/QC_SNPs_COLMAP.txt.bgz
Set force_new=TRUE to override this.
start_col not found in file.Locus RP11-240A16.1 complete in: 0.24 min
┌─────────────────────────────────────────┐
│ │
│ )))> 🦇 XYLT1 [locus 2 / 3] 🦇 <((( │
│ │
└─────────────────────────────────────────┘
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
── Step 1 ▶▶▶ Query 🔎 ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
+ Query Method: tabix
Constructing GRanges query using min/max ranges within a single chromosome.
query_dat is already a GRanges object. Returning directly.
========= echotabix::convert =========
Converting full summary stats file to tabix format for fast querying.
Inferred format: 'table'
Explicit format: 'table'
Inferring comment_char from tabular header: 'SNP'
Determining chrom type from file header.
Chromosome format: 1
Detecting column delimiter.
Identified column separator: \t
Sorting rows by coordinates via bash.
Searching for header row with grep.
( grep ^'SNP' .../QC_SNPs_COLMAP.txt; grep
-v ^'SNP' .../QC_SNPs_COLMAP.txt | sort
-k2,2n
-k3,3n ) > .../file11e24b5d9196c9_sorted.tsv
Constructing outputs
Using existing bgzipped file: /home/rstudio/echolocatoR/echolocatoR_LID/QC_SNPs_COLMAP.txt.bgz
Set force_new=TRUE to override this.
start_col not found in file.Locus XYLT1 complete in: 0.24 min
┌────────────────────────────────────────┐
│ │
│ )))> 🦇 LRP8 [locus 3 / 3] 🦇 <((( │
│ │
└────────────────────────────────────────┘
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
── Step 1 ▶▶▶ Query 🔎 ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
+ Query Method: tabix
Constructing GRanges query using min/max ranges within a single chromosome.
query_dat is already a GRanges object. Returning directly.
========= echotabix::convert =========
Converting full summary stats file to tabix format for fast querying.
Inferred format: 'table'
Explicit format: 'table'
Inferring comment_char from tabular header: 'SNP'
Determining chrom type from file header.
Chromosome format: 1
Detecting column delimiter.
Identified column separator: \t
Sorting rows by coordinates via bash.
Searching for header row with grep.
( grep ^'SNP' .../QC_SNPs_COLMAP.txt; grep
-v ^'SNP' .../QC_SNPs_COLMAP.txt | sort
-k2,2n
-k3,3n ) > .../file11e24b6cdbde69_sorted.tsv
Constructing outputs
Using existing bgzipped file: /home/rstudio/echolocatoR/echolocatoR_LID/QC_SNPs_COLMAP.txt.bgz
Set force_new=TRUE to override this.
start_col not found in file.Locus LRP8 complete in: 0.24 min
I just got this running
Note that: MAF is not inferred even though I am passing Freq col, and I am seeting maf to calculate
Code
Output
Session Info