ModelOriented / shapper

An R wrapper of SHAP python library
https://modeloriented.github.io/shapper/
58 stars 16 forks source link

Add return shap value and exception function. #37

Open eternal-bug opened 3 years ago

eternal-bug commented 3 years ago

Hi there! I have found this package before go die! Thanks, it's very great. It haven't the shap value return function, but in python have. Sometime I want to do analysis with the shap value and exception value. I found it in the source code when I was confused! So I have do little modify to return the shap value. I download the source code, modify and rebuild, and it can work. I modify the individual_variable_effect.R file.

individual_variable_effect.explainer <- function(x,
                                                 new_observation,
                                                 method = "KernelSHAP",
                                                 nsamples = "auto",
                                                 ...) {
  # extracts model, data and predict function from the explainer
  model <- x$model
  data <- x$data
  predict_function <- x$predict_function
  label <- x$label
  if("return_shap_value" %in% names(x)){
    return_shap_value <- x$return_shap_value
  }else{
    return_shap_value <- False
  }

  individual_variable_effect.default(
    model,
    data,
    predict_function,
    new_observation = new_observation,
    label = label,
    method = method,
    nsamples = nsamples,
    return_shap_value = return_shap_value,
    ...
  )
}

#' @importFrom utils tail
#' @export
#' @rdname individual_variable_effect
individual_variable_effect.default <-
  function(x,
           data,
           predict_function = predict,
           new_observation,
           label = tail(class(x), 1),
           method = "KernelSHAP",
           nsamples = "auto",
           return_shap_value = False,
           ...) {
    # check if data correct
    if(!all(colnames(data)==colnames(new_observation))){
      stop("Columns in new obseravtion and data does not match")
    }
    # transform factors to numerics and keep factors' levels
    data_classes <- sapply(data, class)
    factors <- list()
    data_numeric <- data
    for (col in names(data_classes)) {
      if (data_classes[col] == "factor") {
        factors[[col]] <- levels(data[, col])
        data_numeric[, col] <- as.numeric(data_numeric[, col]) - 1
      }
    }

    # force nsamples to be an integer
    if (is.numeric(nsamples))
      nsamples <- as.integer(round(nsamples))

    p_function <- function(new_data) {
      new_data <- as.data.frame(new_data)
      colnames(new_data) <- colnames(data)
      for (col in names(factors)) {
        new_data[, col] <- factor(new_data[, col],
                                  levels = c(0:(length(factors[[col]]) - 1)),
                                  labels = factors[[col]])
      }
      res <- as.data.frame(predict_function(x, new_data))
      if (nrow(res) == 1) {
        res[2, ] <- 0
        res <- r_to_py(res)
        res$drop(res$index[1], inplace = TRUE)
      }
      return(res)
    }
    explainer = shap_reference$KernelExplainer(p_function, data_numeric)

    new_observation_releveled <- new_observation
    new_observation_numeric <- new_observation
    for (col in names(factors)) {
      new_observation_releveled[, col] <-
        factor(new_observation_releveled[, col], levels = factors[[col]])
      new_observation_numeric[, col] <-
        as.numeric(new_observation_releveled[, col]) - 1
    }
    shap_values = explainer$shap_values(new_observation_numeric, nsamples = nsamples)
    expected_value = explainer$expected_value
    if (return_shap_value){
        data <- list()
        data[["shap value"]] <- shap_values
        data[["expected_value"]] <- expected_value
        return(data)
    }

It work. But get the shap value and plot need do two times individual_variable_effect, very low efficiency. There must be a better way to do it. Thanks!