pytorch / captum

Model interpretability and understanding for PyTorch
https://captum.ai
BSD 3-Clause "New" or "Revised" License
4.79k stars 485 forks source link

AssertionError: Forward hook did not obtain any outputs for given layer #875

Open jjnunez11 opened 2 years ago

jjnunez11 commented 2 years ago

I'm trying to use Captum on an LSTM model. I used similiar Captum code to interpret a CNN and a BERT model with lig, but I get the above error when running it on my trained regularized LSTM.

I would super appreciate any help deciphering this error. I'll paste the error log below, and then the snippet of Captum code, and then the model itself.

Error: AssertionError Traceback (most recent call last) ~\AppData\Local\Temp\5/ipykernel_1852/2630938720.py in 1 text = "Ms. Jones is a 64 year old woman with an illness." ----> 3 interpret_sentence(model, text, label=0) 4

~\AppData\Local\Temp\5/ipykernel_1852/1773754268.py in interpret_sentence(model, sentence, min_len, label) 26 27 # compute attributions and approximation delta using layer integrated gradients ---> 28 attributions_ig, delta = lig.attribute(input_indices, reference_indices, \ 29 n_steps=500, return_convergence_delta=True) 30 # Replace Label with Text below

~.conda\envs\scar_nlp\lib\site-packages\captum\log__init__.py in wrapper(*args, kwargs) 33 @wraps(func) 34 def wrapper(*args, *kwargs): ---> 35 return func(args, kwargs) 36 37 return wrapper

~.conda\envs\scar_nlp\lib\site-packages\captum\attr_core\layer\layer_integrated_gradients.py in attribute(self, inputs, baselines, target, additional_forward_args, n_steps, method, internal_batch_size, return_convergence_delta, attribute_to_layer_input) 363 self.device_ids = getattr(self.forward_func, "device_ids", None) 364 --> 365 inputs_layer = _forward_layer_eval( 366 self.forward_func, 367 inps,

~.conda\envs\scar_nlp\lib\site-packages\captum_utils\gradient.py in _forward_layer_eval(forward_fn, inputs, layer, additional_forward_args, device_ids, attribute_to_layer_input, grad_enabled) 179 grad_enabled: bool = False, 180 ) -> Union[Tuple[Tensor, ...], List[Tuple[Tensor, ...]]]: --> 181 return _forward_layer_eval_with_neuron_grads( 182 forward_fn, 183 inputs,

~.conda\envs\scar_nlp\lib\site-packages\captum_utils\gradient.py in _forward_layer_eval_with_neuron_grads(forward_fn, inputs, layer, additional_forward_args, gradient_neuron_selector, grad_enabled, device_ids, attribute_to_layer_input) 442 443 with torch.autograd.set_grad_enabled(grad_enabled): --> 444 saved_layer = _forward_layer_distributed_eval( 445 forward_fn, 446 inputs,

~.conda\envs\scar_nlp\lib\site-packages\captum_utils\gradient.py in _forward_layer_distributed_eval(forward_fn, inputs, layer, target_ind, additional_forward_args, attribute_to_layer_input, forward_hook_with_return, require_layer_grads) 302 303 if len(saved_layer) == 0: --> 304 raise AssertionError("Forward hook did not obtain any outputs for given layer") 305 306 if forward_hook_with_return:

AssertionError: Forward hook did not obtain any outputs for given layer

Portion of Captum code

lig = LayerIntegratedGradients(model, model.embed)

def interpret_sentence(model, sentence, min_len = 50, label = 0): text = [tok for tok in tokenizer(sentence.lower())] if len(text) < min_len: text += [''] * (min_len - len(text)) indexed = [vocab[t] for t in text]

model.zero_grad()

input_indices = torch.tensor(indexed, device=device)
input_indices = input_indices.unsqueeze(0)

# input_indices dim: [sequence_length]
seq_length = min_len

# predict
pred = forward_with_sigmoid(input_indices).item()
pred_ind = round(pred)

# generate reference indices for each sample
reference_indices = token_reference.generate_reference(seq_length, device=device).unsqueeze(0)

# compute attributions and approximation delta using layer integrated gradients
attributions_ig, delta = lig.attribute(input_indices, reference_indices, \
                                       n_steps=500, return_convergence_delta=True)
# Replace Label with Text below
print(itos[pred_ind])
print('pred: ', itos[pred_ind], '(', '%.2f'%pred, ')', ', delta: ', abs(delta))

add_attributions_to_visualizer(attributions_ig, text, pred, pred_ind, label, delta, vis_data_records_ig)

Model

import torch import torch.nn as nn import torchtext

import torch.nn.functional as F

from models.lstm.embed_regularize import embedded_dropout from models.lstm.weight_drop import WeightDrop

Define the model

class LSTM(nn.Module): def init(self, config): super(LSTM, self).init()

self.V = n_vocab

    self.D = config.words_dim    # Vector dimensions
    self.M = config.hidden_dim   # Hidden layer dimension
    self.K = 1                   # Not doing multi-class for now
    self.L = config.num_layers   # Number of layers
    self.device = config.device  # Device, either a CUDA gpu or CPU
    self.is_bidirectional = config.bidirectional
    self.mode = config.mode
    self.embed_droprate = config.embed_droprate  # Embedding droprate
    self.wdrop = config.wdrop

    # Setup Embeddings, whether Pre-trained and fixed
    if self.mode == 'rand':
        rand_embed_init = torch.Tensor(config.vocab_size, config.words_dim).uniform_(-0.25, 0.25)
        self.embed = nn.Embedding.from_pretrained(rand_embed_init, freeze=False)
    elif self.mode == 'static':
        glove = torchtext.vocab.GloVe(
            dim=self.D)  # name="6B", max_vectors=10000 TODO: Options for different vectors
        self.embed = nn.Embedding.from_pretrained(glove.vectors, freeze=True)
    elif self.mode == 'non-static':
        glove = torchtext.vocab.GloVe(
            dim=self.D)  # name="6B", max_vectors=10000 TODO: Options for different vectors
        self.embed = nn.Embedding.from_pretrained(glove.vectors, freeze=False)
    else:
        print("Unsupported Mode")
        exit()

    print(f'self.embed num_embeddings is: {self.embed.num_embeddings} '
          f'while embedding_dim is {self.embed.embedding_dim}')

    # Create model
    self.lstm = nn.LSTM(
        input_size=self.D,
        hidden_size=self.M,
        num_layers=self.L,
        batch_first=True,
        dropout=config.dropout,
        bidirectional=self.is_bidirectional)

    if self.wdrop > 0:
        self.lstm = WeightDrop(self.lstm, ['weight_hh_l0'], dropout=self.wdrop)
    self.dropout = nn.Dropout(config.dropout)

    if self.is_bidirectional:
        self.fc1 = nn.Linear(2 * self.M, self.K)
    else:
        self.fc1 = nn.Linear(self.M, self.K)

    self.dropout = nn.Dropout(config.dropout)

    # Send model to device from init
    self.to(self.device)

def forward(self, x):
    if self.mode == 'rand':
        out = embedded_dropout(self.embed, x,
                                dropout=self.embed_droprate if self.training else 0) if self.embed_droprate else self.embed(
                x)
    elif self.mode == 'static':
        out = embedded_dropout(self.static_embed, x,
                                dropout=self.embed_droprate if self.training else 0) if self.embed_droprate else self.static_embed(
                x)
    elif self.mode == 'non-static':
        out = embedded_dropout(self.non_static_embed, x,
                               dropout=self.embed_droprate if self.training else 0) \
            if self.embed_droprate else self.non_static_embed(x)
    else:
        print("Unsupported Mode")
        exit()

    out, _ = self.lstm(out)

    # max pool
    out, _ = torch.max(out, 1)

    # Dropout
    # Note: will get a warning saying that dropout will not work, but it is triggered
    # above on model creation, we are using dropout down here after the max-pooling layer,
    # so it does have an effect
    out = self.dropout(out)

    # we only want h(T) at the final time step
    out = self.fc1(out)
    return out
NarineK commented 2 years ago

@jjnunez11, the error that you see means that the layer's output was empty. Basically we were not able to retrieve output here: https://github.com/pytorch/captum/blob/master/captum/_utils/gradient.py#L277 I think that one way would be to use LayerActivation for model.embed layer and see if you can access the activations of that layer and check what shape does that output have. You could also set a forward_hook yourself and examine that layer's outputs.

jjnunez11 commented 2 years ago

Awesome thanks so much, I'll give it a try once I submit my thesis