leelabsg / SKAT

Sequence kernel association test (SKAT)
41 stars 16 forks source link

Variants used name not available when only one variant is used for the test #22

Open CamilleGodi opened 1 month ago

CamilleGodi commented 1 month ago

Hello, I encountered a bug/unexpected behavior with the SKAT results when only a single variant is kept for analysis. Instead of results_SKAT$test.snp.mac returning me the single variant's name and value, it only returns its value, thus we can't trace back which variant was used in the test...

I use R version 3.6.0, and SKAT 2.2.5.

Thank you for your attention,

Camille Godi

CamilleGodi commented 1 month ago

I think I found a temporary fix, for those it may interest. The last two lines are used to change the function in the SKAT package by the "fixed" one when my script is called/used.

### FIX SKAT FUNCTION : ALLOW TO GET VAR NAME WHEN TEST ON SINGLE VARIANT
SKAT_MAIN_Check_Z<-function(Z, n, id_include, SetID, weights, weights.beta, impute.method, is_check_genotype
                            , is_dosage, missing_cutoff, max_maf, estimate_MAF=1, Is.chrX=FALSE, SexVar=NULL, Is.ChrY=FALSE){

  #############################################
  # Check parameters
  varNames <- colnames(Z) ### ADDED

  # changed by SLEE 12/23/2019
  if(!Check_Class(Z, c("matrix", "dgCMatrix"))){
    stop("Z is not a matrix")
  }
  if (nrow(Z)!=n) stop("Dimensions of y and Z do not match")
  if(is_dosage ==TRUE){
    impute.method="fixed"
  }

  if(is.null(colnames(Z))){
    colnames(Z)<-sprintf("VAR%d", 1:ncol(Z))
  }

  #####################################################
  # Check Z

  if(!is_check_genotype && !is_dosage){
    Z.test<-Z[id_include,]

    # changed by SLEE 12/23/2019
    if(!Check_Class(Z.test, c("matrix", "dgCMatrix"))){
      Z.test<-as.matrix(Z.test)
    }
    return(list(Z.test=Z.test,weights=weights, MAF=rep(0, ncol(Z)), return=0) )
  }

  if(estimate_MAF==2){
    Z<-cbind(Z[id_include,])
    id_include<-1:length(id_include)
  }

  ##############################################
  # Check Missing 

  IDX_MISS<-union(which(is.na(Z)),which(Z == 9))
  if(length(IDX_MISS) > 0){
    Z[IDX_MISS]<-NA
  } 

  ###################################################
  # Check missing rates and exclude any SNPs with missing rate > missing_cutoff
  # Also exclude non-polymorphic SNPs
  m = ncol(Z)
  ID_INCLUDE_SNP<-NULL
  MAF_toCutoff<-SKAT_Get_MAF(Z, id_include=NULL, Is.chrX=Is.chrX, SexVar=SexVar)

  # Changed by SLEE, 07/21/2017
  for(i in 1:m){
    missing.ratio<-length(which(is.na(Z[,i])))/n
    sd1<-sd(Z[,i], na.rm=TRUE)
    if(missing.ratio < missing_cutoff && sd1 > 0){
      if(MAF_toCutoff[i] < max_maf){
        ID_INCLUDE_SNP<-c(ID_INCLUDE_SNP,i)
      }
    }

  }

  if(length(ID_INCLUDE_SNP) == 0){

    if(is.null(SetID)){
      msg<-sprintf("ALL SNPs have either high missing rates or no-variation. P-value=1")
    } else {
      msg<-sprintf("In %s, ALL SNPs have either high missing rates or no-variation. P-value=1",SetID )
    }
    warning(msg,call.=FALSE)

    re<-list(p.value = 1, p.value.resampling =NA, Test.Type = NA, Q = NA, param=list(n.marker=0, n.marker.test=0), return=1 ) 
    return(re)        

  } else if(m - length(ID_INCLUDE_SNP) > 0 ){

    if(is.null(SetID)){
      msg<-sprintf("%d SNPs with either high missing rates or no-variation are excluded!", m - length(ID_INCLUDE_SNP))
    } else {
      msg<-sprintf("In %s, %d SNPs with either high missing rates or no-variation are excluded!",SetID, m - length(ID_INCLUDE_SNP) )
    }

    warning(msg,call.=FALSE)    
    Z<-as.matrix(Z[, ID_INCLUDE_SNP])
  }

  ##################################################################
  # doing imputation

  MAF_Org<-SKAT_Get_MAF(Z, id_include=NULL, Is.chrX=Is.chrX, SexVar=SexVar)
  Z<-SKAT_MAIN_Check_Z_Impute(Z, id_include,impute.method, SetID, Is.chrX, SexVar)

  MAF<-SKAT_Get_MAF(Z, id_include=NULL, Is.chrX=Is.chrX, SexVar=SexVar)
  MAF1<-SKAT_Get_MAF(Z, id_include=id_include, Is.chrX=Is.chrX, SexVar=SexVar)

  ###########################################
  # Check non-polymorphic
  if(length(which(MAF1 > 0)) == 0){

    if(is.null(SetID)){
      msg<-sprintf("No polymorphic SNP. P-value = 1" )
    } else {
      msg<-sprintf("In %s, No polymorphic SNP. P-value = 1",SetID )
    }
    warning(msg,call.=FALSE)
    re<-list(p.value = 1, p.value.resampling =NA, Test.Type = NA, Q = NA, param=list(n.marker=0, n.marker.test=0), return=1 )   
    return(re)
  }

  ##########################################
  # Get Weights

  if(is.null(weights)){
    weights<-Beta.Weights(MAF,weights.beta)
  } else {
    weights = weights[ID_INCLUDE_SNP]
  }

  ###########################################
  # Check missing of y and X

  if(n - length(id_include)  > 0){

    id_Z<-which(MAF1 > 0)

    if(length(id_Z) == 0){

      if(is.null(SetID)){
        msg<-sprintf("No polymorphic SNP. P-value = 1" )
      } else {
        msg<-sprintf("In %s, No polymorphic SNP. P-value = 1",SetID )
      }
      warning(msg,call.=FALSE)
      re<-list(p.value = 1, p.value.resampling =NA, Test.Type = NA, Q = NA, param=list(n.marker=0, n.marker.test=0), return=1 )   

    } else if (length(id_Z) == 1){
      Z<-cbind(Z[,id_Z])
    } else {
      Z<-Z[,id_Z]
    }

    if(!is.null(weights)){
      weights<-weights[id_Z]
    }

  } 

  if( dim(Z)[2] == 1){

    if(is.null(SetID)){
      msg<-sprintf("Only one SNP in the SNP set!" )
    } else {
      msg<-sprintf("In %s, Only one SNP in the SNP set!"
                   ,SetID )
    }
    # Suppress this warning
    #warning(msg,call.=FALSE)

    Z.test<-as.matrix(Z[id_include,])

  } else {

    Z.test<-Z[id_include,]

  }

  ### ADDED
  if ( ncol(Z.test) == 1 ) {
    colnames(Z.test) <- varNames[ID_INCLUDE_SNP] # FIX VARIANT NAME !
  }
  ### END ADDITION

  return(list(Z.test=Z.test,weights=weights, MAF=MAF_Org, id_include.test=id_include, return=0) )

}

environment(SKAT_MAIN_Check_Z) <- asNamespace('SKAT')
assignInNamespace("SKAT_MAIN_Check_Z", SKAT_MAIN_Check_Z, ns = "SKAT")