SHAP Values for Network with LSTM and Dense Layer Input Branches

Hi there,

thank you for the excellent work! I am trying to generate SHAP values for a model with two input branches: One LSTM branch that ingests sequential data (3D array) and one that ingests non-sequential data (2D array).

The model builder looks like this:

def get_compiled_model(X_seq_shape, X_mom_shape):

    # define two sets of inputs
    input_sequence = Input(shape=(X_seq_shape[1], 
                                  X_seq_shape[2]))

    input_other = Input(shape=(X_mom_shape[1]))

    # firBatchNormalizationanch uses sequence data
    x_seq = LSTM(units=128, return_sequences=True,
                 dropout=0.2,
                 recurrent_dropout=0)(input_sequence)
    x_seq = LSTM(units=64, return_sequences=True,
                 dropout=0.2,
                 recurrent_dropout=0)(x_seq)
    x_seq = LSTM(units=64, return_sequences=True,
                 dropout=0.2,
                 recurrent_dropout=0)(x_seq)
    x_seq = LSTM(units=32, return_sequences=True,
                 dropout=0.2,
                 recurrent_dropout=0)(x_seq)
    x_seq = LSTM(units=32)(x_seq)
    x_seq = Model(inputs=input_sequence, outputs=x_seq)

    # second branch uses momentary data
    x_oth = Dense(128, activation="relu")(input_other)
    x_oth = Dropout(0.2)(x_oth)
    x_oth = Dense(64, activation="relu")(x_oth)
    x_oth = Dropout(0.2)(x_oth)
    x_oth = Dense(64, activation="relu")(x_oth)
    x_oth = Dropout(0.2)(x_oth)
    x_oth = Dense(32, activation="relu")(x_oth)
    x_oth = Dropout(0.2)(x_oth)
    x_oth = Dense(32, activation="relu")(x_oth)
    x_oth = Model(inputs=input_other, outputs=x_oth)

    # concat output of the two branches
    combined = Concatenate()([x_seq.output, x_oth.output])
    x_comb = Dense(8, activation="relu")(combined)
    x_comb = Dense(1, activation="linear")(x_comb)

    model = Model(inputs=[x_seq.input, x_oth.input], outputs=x_comb)

    model.compile(optimizer = 'adam', loss = 'mean_squared_error')

    return model

Training the model works well:


model = get_compiled_model(X_seq_train.shape, X_mom_train.shape)

model.fit(x=[X_seq_train, X_mom_train], y=target_train,
          validation_data = ([X_seq_val, X_mom_val], target_val),
          epochs=100, batch_size=1024, shuffle=True)

Now, when I try to generate SHAP values I am getting an error related to the dimensionality of the two input arrays:

sample = np.random.choice(X_seq_train.shape[0], 100, replace=False)
background = [np.take(X_seq_train, sample, axis=0),
              np.take(X_mom_train, sample, axis=0)]

ex = shap.DeepExplainer(model, background)

shap_values = ex.shap_values([X_seq_test[0:10,:,:], X_mom_test[10,:]])

The last line is throwing the error:


---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
/tmp/ipykernel_26501/3289302125.py in <module>
----> 1 shap_values = ex.shap_values([X_seq_test[0:10,:,X_seq_idx], X_mom_test[0:10,X_mom_idx]])

/opt/conda/lib/python3.7/site-packages/shap/explainers/_deep/__init__.py in shap_values(self, X, ranked_outputs, output_rank_order, check_additivity)
    122             were chosen as "top".
    123         """
--> 124         return self.explainer.shap_values(X, ranked_outputs, output_rank_order, check_additivity=check_additivity)

/opt/conda/lib/python3.7/site-packages/shap/explainers/_deep/deep_tf.py in shap_values(self, X, ranked_outputs, output_rank_order, check_additivity)
    306                 # run attribution computation graph
    307                 feature_ind = model_output_ranks[j,i]
--> 308                 sample_phis = self.run(self.phi_symbolic(feature_ind), self.model_inputs, joint_input)
    309 
    310                 # assign the attributions to the right part of the output arrays

/opt/conda/lib/python3.7/site-packages/shap/explainers/_deep/deep_tf.py in run(self, out, model_inputs, X)
    363 
    364                 return final_out
--> 365             return self.execute_with_overridden_gradients(anon)
    366 
    367     def custom_grad(self, op, *grads):

/opt/conda/lib/python3.7/site-packages/shap/explainers/_deep/deep_tf.py in execute_with_overridden_gradients(self, f)
    399         # define the computation graph for the attribution values using a custom gradient-like computation
    400         try:
--> 401             out = f()
    402         finally:
    403             # reinstate the backpropagatable check

/opt/conda/lib/python3.7/site-packages/shap/explainers/_deep/deep_tf.py in anon()
    359                     v = tf.constant(data, dtype=self.model_inputs[i].dtype)
    360                     inputs.append(v)
--> 361                 final_out = out(inputs)
    362                 tf_execute.record_gradient = tf_backprop._record_gradient
    363 

/opt/conda/lib/python3.7/site-packages/tensorflow/python/eager/def_function.py in __call__(self, *args, **kwds)
    778       else:
    779         compiler = "nonXla"
--> 780         result = self._call(*args, **kwds)
    781 
    782       new_tracing_count = self._get_tracing_count()

/opt/conda/lib/python3.7/site-packages/tensorflow/python/eager/def_function.py in _call(self, *args, **kwds)
    812       # In this case we have not created variables on the first call. So we can
    813       # run the first trace but we should fail if variables are created.
--> 814       results = self._stateful_fn(*args, **kwds)
    815       if self._created_variables:
    816         raise ValueError("Creating variables on a non-first call to a function"

/opt/conda/lib/python3.7/site-packages/tensorflow/python/eager/function.py in __call__(self, *args, **kwargs)
   2826     """Calls a graph function specialized to the inputs."""
   2827     with self._lock:
-> 2828       graph_function, args, kwargs = self._maybe_define_function(args, kwargs)
   2829     return graph_function._filtered_call(args, kwargs)  # pylint: disable=protected-access
   2830 

/opt/conda/lib/python3.7/site-packages/tensorflow/python/eager/function.py in _maybe_define_function(self, args, kwargs)
   3211 
   3212       self._function_cache.missed.add(call_context_key)
-> 3213       graph_function = self._create_graph_function(args, kwargs)
   3214       self._function_cache.primary[cache_key] = graph_function
   3215       return graph_function, args, kwargs

/opt/conda/lib/python3.7/site-packages/tensorflow/python/eager/function.py in _create_graph_function(self, args, kwargs, override_flat_arg_shapes)
   3073             arg_names=arg_names,
   3074             override_flat_arg_shapes=override_flat_arg_shapes,
-> 3075             capture_by_value=self._capture_by_value),
   3076         self._function_attributes,
   3077         function_spec=self.function_spec,

/opt/conda/lib/python3.7/site-packages/tensorflow/python/framework/func_graph.py in func_graph_from_py_func(name, python_func, args, kwargs, signature, func_graph, autograph, autograph_options, add_control_dependencies, arg_names, op_return_value, collections, capture_by_value, override_flat_arg_shapes)
    984         _, original_func = tf_decorator.unwrap(python_func)
    985 
--> 986       func_outputs = python_func(*func_args, **func_kwargs)
    987 
    988       # invariant: `func_outputs` contains only Tensors, CompositeTensors,

/opt/conda/lib/python3.7/site-packages/tensorflow/python/eager/def_function.py in wrapped_fn(*args, **kwds)
    598         # __wrapped__ allows AutoGraph to swap in a converted function. We give
    599         # the function a weak reference to itself to avoid a reference cycle.
--> 600         return weak_wrapped_fn().__wrapped__(*args, **kwds)
    601     weak_wrapped_fn = weakref.ref(wrapped_fn)
    602 

/opt/conda/lib/python3.7/site-packages/tensorflow/python/framework/func_graph.py in wrapper(*args, **kwargs)
    971           except Exception as e:  # pylint:disable=broad-except
    972             if hasattr(e, "ag_error_metadata"):
--> 973               raise e.ag_error_metadata.to_exception(e)
    974             else:
    975               raise

AttributeError: in user code:

    /opt/conda/lib/python3.7/site-packages/shap/explainers/_deep/deep_tf.py:243 grad_graph  *
        out = self.model(shap_rAnD)
    /opt/conda/lib/python3.7/site-packages/tensorflow/python/keras/engine/base_layer.py:985 __call__  **
        outputs = call_fn(inputs, *args, **kwargs)
    /opt/conda/lib/python3.7/site-packages/tensorflow/python/keras/engine/functional.py:386 call
        inputs, training=training, mask=mask)
    /opt/conda/lib/python3.7/site-packages/tensorflow/python/keras/engine/functional.py:508 _run_internal_graph
        outputs = node.layer(*args, **kwargs)
    /opt/conda/lib/python3.7/site-packages/tensorflow/python/keras/layers/recurrent.py:659 __call__
        return super(RNN, self).__call__(inputs, **kwargs)
    /opt/conda/lib/python3.7/site-packages/tensorflow/python/keras/engine/base_layer.py:985 __call__
        outputs = call_fn(inputs, *args, **kwargs)
    /opt/conda/lib/python3.7/site-packages/tensorflow/python/keras/layers/recurrent_v2.py:1183 call
        runtime) = lstm_with_backend_selection(**normal_lstm_kwargs)
    /opt/conda/lib/python3.7/site-packages/tensorflow/python/keras/layers/recurrent_v2.py:1559 lstm_with_backend_selection
        function.register(defun_gpu_lstm, **params)
    /opt/conda/lib/python3.7/site-packages/tensorflow/python/eager/function.py:3241 register
        concrete_func.add_gradient_functions_to_graph()
    /opt/conda/lib/python3.7/site-packages/tensorflow/python/eager/function.py:2063 add_gradient_functions_to_graph
        self._delayed_rewrite_functions.forward_backward())
    /opt/conda/lib/python3.7/site-packages/tensorflow/python/eager/function.py:621 forward_backward
        forward, backward = self._construct_forward_backward(num_doutputs)
    /opt/conda/lib/python3.7/site-packages/tensorflow/python/eager/function.py:669 _construct_forward_backward
        func_graph=backwards_graph)
    /opt/conda/lib/python3.7/site-packages/tensorflow/python/framework/func_graph.py:986 func_graph_from_py_func
        func_outputs = python_func(*func_args, **func_kwargs)
    /opt/conda/lib/python3.7/site-packages/tensorflow/python/eager/function.py:659 _backprop_function
        src_graph=self._func_graph)
    /opt/conda/lib/python3.7/site-packages/tensorflow/python/ops/gradients_util.py:669 _GradientsHelper
        lambda: grad_fn(op, *out_grads))
    /opt/conda/lib/python3.7/site-packages/tensorflow/python/ops/gradients_util.py:336 _MaybeCompile
        return grad_fn()  # Exit early
    /opt/conda/lib/python3.7/site-packages/tensorflow/python/ops/gradients_util.py:669 <lambda>
        lambda: grad_fn(op, *out_grads))
    /opt/conda/lib/python3.7/site-packages/shap/explainers/_deep/deep_tf.py:371 custom_grad
        out = op_handlers[type_name](self, op, *grads) # we cut off the shap_ prefex before the lookup
    /opt/conda/lib/python3.7/site-packages/shap/explainers/_deep/deep_tf.py:660 handler
        return linearity_with_excluded_handler(input_inds, explainer, op, *grads)
    /opt/conda/lib/python3.7/site-packages/shap/explainers/_deep/deep_tf.py:667 linearity_with_excluded_handler
        assert not explainer._variable_inputs(op)[i], str(i) + "th input to " + op.name + " cannot vary!"
    /opt/conda/lib/python3.7/site-packages/shap/explainers/_deep/deep_tf.py:220 _variable_inputs
        out[i] = t.name in self.between_tensors

    AttributeError: 'TFDeep' object has no attribute 'between_tensors'

I am not sure if multi branch network architectures like the one I am using are currently supported. If this is not the case, would you be so kind an point me to a workaround for this problem?

shap / shap

SHAP Values for Network with LSTM and Dense Layer Input Branches #2153