Closed DM087 closed 3 years ago
I have also updated the script https://github.com/choishingwan/PRS-Tutorial/blob/master/docs/ldpred.md
Could you please check if the new script works as expected? If it does, then I will render the new website. Thank you
Sam
Hi, could you please try and remove [chr.idx] from beta_grid and see if it works? I don't have time to verify it but based on my pipeline, that might be the proper solution.
e.g.
tmp <- big_prodMat(genotype, beta_auto, ind.row = ind.test, ind.col = ind.chr) pred_scaled <- apply(tmp, 2, sd) final_beta_auto <- rowMeans(tmp[, abs(pred_scaled - median(pred_scaled)) < 3 * mad(pred_scaled)]) tmp <- big_prodVec(genotype, final_beta_auto, ind.row = ind.test, ind.col = ind.chr) if(is.null(pred_auto)){ pred_auto <- tmp }else{ pred_auto <- pred_auto + tmp }
Hi,
I tried the script https://github.com/choishingwan/PRS-Tutorial/blob/master/docs/ldpred.md and I get the following error for all of the models
Error: Error: Incompatibility between dimensions
Thank you! Dev
Ok, I will need to go back and check. Though I don’t have time for that at the moment…
For now you can maybe use the following code(Replace all ${} items with appropriate name, and change the labeling / column names if you are not using the tutorial). This is what I used to get the LDpred expected results so it should work.
library(data.table)
library(magrittr)
library(bigsnpr)
options(bigstatsr.check.parallel.blas = FALSE)
options(default.nproc.blas = NULL)
phenotype <- fread("${pheno}")
covariate <- fread("${cov}")
pheno <- merge(phenotype, covariate)
hapMap <- readRDS("${hap}")
sumstats <- bigreadr::fread2("${gwas}")
names(sumstats) <-
c("chr",
"pos",
"rsid",
"a1",
"a0",
"n_eff",
"beta_se",
"p",
"OR",
"INFO",
"MAF")
sumstats$beta <- log(sumstats$OR)
sumstats <- sumstats[sumstats$rsid %in% hapMap$rsid,]
NCORES <- 24
tmp <- tempfile(tmpdir = "tmp-data")
on.exit(file.remove(paste0(tmp, ".sbk")), add = TRUE)
corr <- NULL
ld <- NULL
fam.order <- NULL
info_snp <- NULL
for (chr in 1:22) {
snp_readBed(paste0(chr,".bed"))
obj.bigSNP <- snp_attach(paste0(chr,".rds"))
map <- obj.bigSNP$map[-3]
names(map) <- c("chr", "rsid", "pos", "a1", "a0")
tmp_snp <- snp_match(sumstats[sumstats$chr==chr,], map)
info_snp <- rbind(info_snp, tmp_snp)
genotype <- obj.bigSNP$genotypes
CHR <- map$chr
POS <- map$pos
POS2 <- snp_asGeneticPos(CHR, POS, dir = ".")
ind.chr <- which(tmp_snp$chr == chr)
ind.chr2 <- tmp_snp$_NUM_ID_
[ind.chr]
corr0 <- snp_cor(
genotype,
ind.col = ind.chr2,
ncores = NCORES,
infos.pos = POS2[ind.chr2],
size = 3 / 1000
)
if (chr == 1) {
ld <- Matrix::colSums(corr0^2)
corr <- as_SFBM(corr0, tmp)
} else {
ld <- c(ld, Matrix::colSums(corr0^2))
corr$add_columns(corr0, nrow(corr))
}
# We assume the fam order is the same across different chromosomes
if(is.null(fam.order)){
fam.order <- as.data.table(obj.bigSNP$fam)
}
}
# Rename fam order
setnames(fam.order,
c("family.ID", "sample.ID"),
c("FID", "IID"))
df_beta <- info_snp[,c("beta", "beta_se", "n_eff", "_NUMID")]
ldsc <- snp_ldsc( ld,
length(ld),
chi2 = (df_beta$beta / df_beta$beta_se)^2,
sample_size = df_beta$n_eff,
blocks = NULL)
h2_est <- ldsc[["h2"]]
y <- pheno[fam.order, on = c("FID", "IID")]
null.model <- paste("PC", 1:6, sep = "", collapse = "+") %>%
paste0("Height~Sex+", .) %>%
as.formula %>%
lm(., data = y) %>%
summary
null.r2 <- null.model$r.squared
ind.test <- 1:nrow(genotype)
if("${type}"=="inf"){
beta_inf <- snp_ldpred2_inf(corr, df_beta, h2 = h2_est)
pred_inf <- NULL
for(chr in 1:22){
obj.bigSNP <- snp_attach(paste0(chr,".rds"))
genotype <- obj.bigSNP$genotypes
chr.idx <- which(info_snp$chr == chr)
ind.chr <- info_snp$_NUM_ID_
[chr.idx]
tmp <- big_prodVec(genotype,
beta_inf[chr.idx],
ind.row = ind.test,
ind.col = ind.chr)
if(is.null(pred_inf)){
pred_inf <- tmp
}else{
pred_inf <- pred_inf + tmp
}
}
reg.formula <- paste("PC", 1:6, sep = "", collapse = "+") %>%
paste0("Height~PRS+Sex+", .) %>%
as.formula
reg.dat <- y
reg.dat$PRS <- pred_inf
inf.model <- lm(reg.formula, dat=reg.dat) %>%
summary
(result <- data.table(
infinitesimal = inf.model$r.squared - null.r2,
null = null.r2
))
}else if("${type}"=="grid"){
p_seq <- signif(seq_log(1e-4, 1, length.out = 17), 2)
h2_seq <- round(h2_est * c(0.7, 1, 1.4), 4)
grid.param <-
expand.grid(p = p_seq,
h2 = h2_seq,
sparse = c(FALSE, TRUE))
beta_grid <-
snp_ldpred2_grid(corr, df_beta, grid.param, ncores = NCORES)
pred_grid <- NULL
for(chr in 1:22){
obj.bigSNP <- snp_attach(paste0(chr,".rds"))
genotype <- obj.bigSNP$genotypes
chr.idx <- which(info_snp$chr == chr)
ind.chr <- info_snp$_NUM_ID_
[chr.idx]
tmp <- big_prodMat( genotype,
beta_grid[chr.idx,],
ind.row = ind.test,
ind.col = ind.chr)
if(is.null(pred_grid)){
pred_grid <- tmp
}else{
pred_grid <- pred_grid + tmp
}
}
reg.formula <- paste("PC", 1:6, sep = "", collapse = "+") %>%
paste0("Height~PRS+Sex+", .) %>%
as.formula
reg.dat <- y
max.r2 <- 0
for(i in 1:ncol(pred_grid)){
reg.dat$PRS <- pred_grid[,i]
grid.model <- lm(reg.formula, dat=reg.dat) %>%
summary
if(max.r2 < grid.model$r.squared){
max.r2 <- grid.model$r.squared
}
}
(result <- data.table(
grid = max.r2 - null.r2,
null = null.r2
))
}else{
multi_auto <- snp_ldpred2_auto(
corr,
df_beta,
h2_init = h2_est,
vec_p_init = seq_log(1e-4, 0.9, length.out = NCORES),
ncores = NCORES
)
beta_auto <- sapply(multi_auto, function(auto)
auto$beta_est)
pred_auto <- NULL
for(chr in 1:22){
obj.bigSNP <- snp_attach(paste0(chr,".rds"))
genotype <- obj.bigSNP$genotypes
chr.idx <- which(info_snp$chr == chr)
ind.chr <- info_snp$_NUM_ID_
[chr.idx]
tmp <-
big_prodMat(genotype,
beta_auto[chr.idx,],
ind.row = ind.test,
ind.col = ind.chr)
pred_scaled <- apply(tmp, 2, sd)
final_beta_auto <-
rowMeans(beta_auto[chr.idx,
abs(pred_scaled -
median(pred_scaled)) <
3 * mad(pred_scaled)])
tmp <-
big_prodVec(genotype,
final_beta_auto,
ind.row = ind.test,
ind.col = ind.chr)
if(is.null(pred_auto)){
pred_auto <- tmp
}else{
pred_auto <- pred_auto + tmp
}
}
reg.formula <- paste("PC", 1:6, sep = "", collapse = "+") %>%
paste0("Height~PRS+Sex+", .) %>%
as.formula
reg.dat <- y
reg.dat$PRS <- pred_auto
auto.model <- lm(reg.formula, dat=reg.dat) %>%
summary
(result <- data.table(
auto = auto.model$r.squared - null.r2,
null = null.r2
))
}
From: DM087 notifications@github.com Reply-To: choishingwan/PRS-Tutorial reply@reply.github.com Date: Tuesday, December 1, 2020 at 4:27 PM To: choishingwan/PRS-Tutorial PRS-Tutorial@noreply.github.com Cc: Shing Wan Choi choishingwan@gmail.com, Comment comment@noreply.github.com Subject: Re: [choishingwan/PRS-Tutorial] LDpred-2 tutorial: error in "Obtain model PRS" (#19)
Hi,
I tried the script https://github.com/choishingwan/PRS-Tutorial/blob/master/docs/ldpred.md and I get the following error for all of the models
Error: Error: Incompatibility between dimensions
Thank you! Dev
— You are receiving this because you commented. Reply to this email directly, view it on GitHub, or unsubscribe.
Hi,
Thank you for the code. I wanted to try it on the example dataset that you have provided. I have a couple of questions here
"${type}"=="grid"
is this the model name? what are we defining here?
I wanted to add the adjusted beta from say inf model (beta_inf) to their corresponding SNP IDs. Do you think the below code would do it
adjusted_betas = cbind(data.table(info_snp)[,.(chr,pos,a0,a1,rsid.ss,beta_se,p,OR,INFO,MAF,beta)],beta_inf)
Thank you again!
Dev
Type can be grid, inf or auto, which corresponds to the three possible LDpred model.
I have never tried, but I’d imagine the index of the beta follow their input.
Sam
From: DM087 notifications@github.com Reply-To: choishingwan/PRS-Tutorial reply@reply.github.com Date: Wednesday, December 2, 2020 at 12:49 PM To: choishingwan/PRS-Tutorial PRS-Tutorial@noreply.github.com Cc: Shing Wan Choi choishingwan@gmail.com, Comment comment@noreply.github.com Subject: Re: [choishingwan/PRS-Tutorial] LDpred-2 tutorial: error in "Obtain model PRS" (#19)
Hi,
Thank you for the code. I wanted to try it on the example dataset that you have provided. I have a couple of questions here "${type}"=="grid" is this the model name? what are we defining here? I wanted to add the adjusted beta from say inf model (beta_inf) to their corresponding SNP IDs. Do you think the below code would do it adjusted_betas = cbind(data.table(info_snp)[,.(chr,pos,a0,a1,rsid.ss,beta_se,p,OR,INFO,MAF,beta)],beta_inf)
Thank you again!
Dev
— You are receiving this because you commented. Reply to this email directly, view it on GitHub, or unsubscribe.
Thank you.
I tried "${type}"=="grid" as "grid"=="grid" and "inf"=="inf" (on chromosome separated files) using the above code but it creates PRS for auto model only. Am I missing something here?
Dev
Try to replace all ${type} with type
Then do type <- “grid” at the beginning. I might have used the ${type} to name the results.
Sam
From: DM087 notifications@github.com Reply-To: choishingwan/PRS-Tutorial reply@reply.github.com Date: Wednesday, December 2, 2020 at 2:42 PM To: choishingwan/PRS-Tutorial PRS-Tutorial@noreply.github.com Cc: Shing Wan Choi choishingwan@gmail.com, Comment comment@noreply.github.com Subject: Re: [choishingwan/PRS-Tutorial] LDpred-2 tutorial: error in "Obtain model PRS" (#19)
Thank you.
I tried "${type}"=="grid" as "grid"=="grid" and "inf"=="inf" (on chromosome separated files) using the above code but it creates PRS for auto model only. Am I missing something here?
Dev
— You are receiving this because you commented. Reply to this email directly, view it on GitHub, or unsubscribe.
Thank you!
Dev
How do you resolve this error task 13 failed - "cannot open URL 'https://github.com/joepickrell/1000-genomes-genetic-maps/raw/master/interpolated_OMNI/chr23.OMNI.interpolated_genetic_map.gz'"
I got this after running this code POS2 <- snp_asGeneticPos(CHR, POS, dir = ".")
There shouldn't be chr 23 ( don't think LDpred supports the sex chromosomes)
Hi and thanks for the quick response. I agree that they should be no chromosome 23. But this is the error I am getting is there a way to get around this. As that error is making me not progress with the tutorial .It pop us after running
POS2 <- snp_asGeneticPos(CHR, POS, dir = ".")
POS2 is not created and I am not able to do the downstream analysis
Just make sure you don’t have CHR bigger than 22 and that should in theory be fine
From: tinashedoc notifications@github.com Reply-To: choishingwan/PRS-Tutorial reply@reply.github.com Date: Tuesday, January 12, 2021 at 12:05 PM To: choishingwan/PRS-Tutorial PRS-Tutorial@noreply.github.com Cc: Shing Wan Choi choishingwan@gmail.com, Comment comment@noreply.github.com Subject: Re: [choishingwan/PRS-Tutorial] LDpred-2 tutorial: error in "Obtain model PRS" (#19)
Hi and thanks for the quick response. I agree that they should be no chromosome 23. But this is the error I am getting is there a way to get around this. As that error is making me not progress with the tutorial .It pop us after running
POS2 <- snp_asGeneticPos(CHR, POS, dir = ".")
POS2 is not created and I am not able to do the downstream analysis
— You are receiving this because you commented. Reply to this email directly, view it on GitHub, or unsubscribe.
Thanks for the advice , the code ran well when I removed the chr 23.
Dear Sam,
I have been following your tutorial on LDpred-2 (Using chromosome separated bed files) and finding it very helpful in calculating PRS. Thank you.
When I run step 7 (obtain model PRS) using chromosome separated bed files, the code in
big_prodMat
throws an error _Error in seqlen(nrow(x)) : argument must be coercible to non-negative integer. I get this error for both Grid and Auto models.Do you think object
beta_grid[chr.idx]
inbig_prodMat
rowMeans
should beas.matrix(beta_grid[chr.idx]
Best, Dev