Open CamilleGodi opened 1 month ago
I think I found a temporary fix, for those it may interest. The last two lines are used to change the function in the SKAT package by the "fixed" one when my script is called/used.
### FIX SKAT FUNCTION : ALLOW TO GET VAR NAME WHEN TEST ON SINGLE VARIANT
SKAT_MAIN_Check_Z<-function(Z, n, id_include, SetID, weights, weights.beta, impute.method, is_check_genotype
, is_dosage, missing_cutoff, max_maf, estimate_MAF=1, Is.chrX=FALSE, SexVar=NULL, Is.ChrY=FALSE){
#############################################
# Check parameters
varNames <- colnames(Z) ### ADDED
# changed by SLEE 12/23/2019
if(!Check_Class(Z, c("matrix", "dgCMatrix"))){
stop("Z is not a matrix")
}
if (nrow(Z)!=n) stop("Dimensions of y and Z do not match")
if(is_dosage ==TRUE){
impute.method="fixed"
}
if(is.null(colnames(Z))){
colnames(Z)<-sprintf("VAR%d", 1:ncol(Z))
}
#####################################################
# Check Z
if(!is_check_genotype && !is_dosage){
Z.test<-Z[id_include,]
# changed by SLEE 12/23/2019
if(!Check_Class(Z.test, c("matrix", "dgCMatrix"))){
Z.test<-as.matrix(Z.test)
}
return(list(Z.test=Z.test,weights=weights, MAF=rep(0, ncol(Z)), return=0) )
}
if(estimate_MAF==2){
Z<-cbind(Z[id_include,])
id_include<-1:length(id_include)
}
##############################################
# Check Missing
IDX_MISS<-union(which(is.na(Z)),which(Z == 9))
if(length(IDX_MISS) > 0){
Z[IDX_MISS]<-NA
}
###################################################
# Check missing rates and exclude any SNPs with missing rate > missing_cutoff
# Also exclude non-polymorphic SNPs
m = ncol(Z)
ID_INCLUDE_SNP<-NULL
MAF_toCutoff<-SKAT_Get_MAF(Z, id_include=NULL, Is.chrX=Is.chrX, SexVar=SexVar)
# Changed by SLEE, 07/21/2017
for(i in 1:m){
missing.ratio<-length(which(is.na(Z[,i])))/n
sd1<-sd(Z[,i], na.rm=TRUE)
if(missing.ratio < missing_cutoff && sd1 > 0){
if(MAF_toCutoff[i] < max_maf){
ID_INCLUDE_SNP<-c(ID_INCLUDE_SNP,i)
}
}
}
if(length(ID_INCLUDE_SNP) == 0){
if(is.null(SetID)){
msg<-sprintf("ALL SNPs have either high missing rates or no-variation. P-value=1")
} else {
msg<-sprintf("In %s, ALL SNPs have either high missing rates or no-variation. P-value=1",SetID )
}
warning(msg,call.=FALSE)
re<-list(p.value = 1, p.value.resampling =NA, Test.Type = NA, Q = NA, param=list(n.marker=0, n.marker.test=0), return=1 )
return(re)
} else if(m - length(ID_INCLUDE_SNP) > 0 ){
if(is.null(SetID)){
msg<-sprintf("%d SNPs with either high missing rates or no-variation are excluded!", m - length(ID_INCLUDE_SNP))
} else {
msg<-sprintf("In %s, %d SNPs with either high missing rates or no-variation are excluded!",SetID, m - length(ID_INCLUDE_SNP) )
}
warning(msg,call.=FALSE)
Z<-as.matrix(Z[, ID_INCLUDE_SNP])
}
##################################################################
# doing imputation
MAF_Org<-SKAT_Get_MAF(Z, id_include=NULL, Is.chrX=Is.chrX, SexVar=SexVar)
Z<-SKAT_MAIN_Check_Z_Impute(Z, id_include,impute.method, SetID, Is.chrX, SexVar)
MAF<-SKAT_Get_MAF(Z, id_include=NULL, Is.chrX=Is.chrX, SexVar=SexVar)
MAF1<-SKAT_Get_MAF(Z, id_include=id_include, Is.chrX=Is.chrX, SexVar=SexVar)
###########################################
# Check non-polymorphic
if(length(which(MAF1 > 0)) == 0){
if(is.null(SetID)){
msg<-sprintf("No polymorphic SNP. P-value = 1" )
} else {
msg<-sprintf("In %s, No polymorphic SNP. P-value = 1",SetID )
}
warning(msg,call.=FALSE)
re<-list(p.value = 1, p.value.resampling =NA, Test.Type = NA, Q = NA, param=list(n.marker=0, n.marker.test=0), return=1 )
return(re)
}
##########################################
# Get Weights
if(is.null(weights)){
weights<-Beta.Weights(MAF,weights.beta)
} else {
weights = weights[ID_INCLUDE_SNP]
}
###########################################
# Check missing of y and X
if(n - length(id_include) > 0){
id_Z<-which(MAF1 > 0)
if(length(id_Z) == 0){
if(is.null(SetID)){
msg<-sprintf("No polymorphic SNP. P-value = 1" )
} else {
msg<-sprintf("In %s, No polymorphic SNP. P-value = 1",SetID )
}
warning(msg,call.=FALSE)
re<-list(p.value = 1, p.value.resampling =NA, Test.Type = NA, Q = NA, param=list(n.marker=0, n.marker.test=0), return=1 )
} else if (length(id_Z) == 1){
Z<-cbind(Z[,id_Z])
} else {
Z<-Z[,id_Z]
}
if(!is.null(weights)){
weights<-weights[id_Z]
}
}
if( dim(Z)[2] == 1){
if(is.null(SetID)){
msg<-sprintf("Only one SNP in the SNP set!" )
} else {
msg<-sprintf("In %s, Only one SNP in the SNP set!"
,SetID )
}
# Suppress this warning
#warning(msg,call.=FALSE)
Z.test<-as.matrix(Z[id_include,])
} else {
Z.test<-Z[id_include,]
}
### ADDED
if ( ncol(Z.test) == 1 ) {
colnames(Z.test) <- varNames[ID_INCLUDE_SNP] # FIX VARIANT NAME !
}
### END ADDITION
return(list(Z.test=Z.test,weights=weights, MAF=MAF_Org, id_include.test=id_include, return=0) )
}
environment(SKAT_MAIN_Check_Z) <- asNamespace('SKAT')
assignInNamespace("SKAT_MAIN_Check_Z", SKAT_MAIN_Check_Z, ns = "SKAT")
Hello, I encountered a bug/unexpected behavior with the SKAT results when only a single variant is kept for analysis. Instead of results_SKAT$test.snp.mac returning me the single variant's name and value, it only returns its value, thus we can't trace back which variant was used in the test...
I use R version 3.6.0, and SKAT 2.2.5.
Thank you for your attention,
Camille Godi