cdpierse / transformers-interpret

Model explainability that works seamlessly with 🤗 transformers. Explain your transformers model in just 2 lines of code.
Apache License 2.0
1.27k stars 96 forks source link

change tokenizer parameters #95

Open yusufcakmakk opened 2 years ago

yusufcakmakk commented 2 years ago

Hi,

It is ok when I use SequenceClassificationExplainer with short texts but for long texts it throws an error like RuntimeError: The expanded size of the tensor (583) must match the existing size (514) at non-singleton dimension 1. Target sizes: [1, 583]. Tensor sizes: [1, 514]

I think it will solve the problem if I modify or pass some parameters like padding="max_length", truncation=True, max_length=max_length to explainer.

Do you have any suggestion for this problem? How can I solve?

Example usage:

from transformers import AutoModelForSequenceClassification, AutoTokenizer
from transformers_interpret import MultiLabelClassificationExplainer, SequenceClassificationExplainer

model = AutoModelForSequenceClassification.from_pretrained("model_name")
tokenizer = AutoTokenizer.from_pretrained("model_name")

explainer = SequenceClassificationExplainer(model, tokenizer)

example_text = """some long text"""
word_attributions = explainer(example_text)

Exception:

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
/tmp/ipykernel_247623/3310833535.py in <module>
      1 example_text = """some long text"""
----> 2 word_attributions = explainer(preprocess(example_text), class_name="riskli")

/data/ai-nlp/miniconda3/envs/pl_test/lib/python3.8/site-packages/transformers_interpret/explainers/sequence_classification.py in __call__(self, text, index, class_name, embedding_type, internal_batch_size, n_steps)
    312         if internal_batch_size:
    313             self.internal_batch_size = internal_batch_size
--> 314         return self._run(text, index, class_name, embedding_type=embedding_type)
    315 
    316     def __str__(self):

/data/ai-nlp/miniconda3/envs/pl_test/lib/python3.8/site-packages/transformers_interpret/explainers/sequence_classification.py in _run(self, text, index, class_name, embedding_type)
    266         self.text = self._clean_text(text)
    267 
--> 268         self._calculate_attributions(embeddings=embeddings, index=index, class_name=class_name)
    269         return self.word_attributions  # type: ignore
    270 

/data/ai-nlp/miniconda3/envs/pl_test/lib/python3.8/site-packages/transformers_interpret/explainers/sequence_classification.py in _calculate_attributions(self, embeddings, index, class_name)
    225 
    226         reference_tokens = [token.replace("Ġ", "") for token in self.decode(self.input_ids)]
--> 227         lig = LIGAttributions(
    228             self._forward,
    229             embeddings,

/data/ai-nlp/miniconda3/envs/pl_test/lib/python3.8/site-packages/transformers_interpret/attributions.py in __init__(self, custom_forward, embeddings, tokens, input_ids, ref_input_ids, sep_id, attention_mask, token_type_ids, position_ids, ref_token_type_ids, ref_position_ids, internal_batch_size, n_steps)
     60             )
     61         elif self.position_ids is not None:
---> 62             self._attributions, self.delta = self.lig.attribute(
     63                 inputs=(self.input_ids, self.position_ids),
     64                 baselines=(

/data/ai-nlp/miniconda3/envs/pl_test/lib/python3.8/site-packages/captum/log/__init__.py in wrapper(*args, **kwargs)
     33             @wraps(func)
     34             def wrapper(*args, **kwargs):
---> 35                 return func(*args, **kwargs)
     36 
     37             return wrapper

/data/ai-nlp/miniconda3/envs/pl_test/lib/python3.8/site-packages/captum/attr/_core/layer/layer_integrated_gradients.py in attribute(self, inputs, baselines, target, additional_forward_args, n_steps, method, internal_batch_size, return_convergence_delta, attribute_to_layer_input)
    363             self.device_ids = getattr(self.forward_func, "device_ids", None)
    364 
--> 365         inputs_layer = _forward_layer_eval(
    366             self.forward_func,
    367             inps,

/data/ai-nlp/miniconda3/envs/pl_test/lib/python3.8/site-packages/captum/_utils/gradient.py in _forward_layer_eval(forward_fn, inputs, layer, additional_forward_args, device_ids, attribute_to_layer_input, grad_enabled)
    180     grad_enabled: bool = False,
    181 ) -> Union[Tuple[Tensor, ...], List[Tuple[Tensor, ...]]]:
--> 182     return _forward_layer_eval_with_neuron_grads(
    183         forward_fn,
    184         inputs,

/data/ai-nlp/miniconda3/envs/pl_test/lib/python3.8/site-packages/captum/_utils/gradient.py in _forward_layer_eval_with_neuron_grads(forward_fn, inputs, layer, additional_forward_args, gradient_neuron_selector, grad_enabled, device_ids, attribute_to_layer_input)
    443 
    444     with torch.autograd.set_grad_enabled(grad_enabled):
--> 445         saved_layer = _forward_layer_distributed_eval(
    446             forward_fn,
    447             inputs,

/data/ai-nlp/miniconda3/envs/pl_test/lib/python3.8/site-packages/captum/_utils/gradient.py in _forward_layer_distributed_eval(forward_fn, inputs, layer, target_ind, additional_forward_args, attribute_to_layer_input, forward_hook_with_return, require_layer_grads)
    292                     single_layer.register_forward_hook(hook_wrapper(single_layer))
    293                 )
--> 294         output = _run_forward(
    295             forward_fn,
    296             inputs,

/data/ai-nlp/miniconda3/envs/pl_test/lib/python3.8/site-packages/captum/_utils/common.py in _run_forward(forward_func, inputs, target, additional_forward_args)
    454     additional_forward_args = _format_additional_forward_args(additional_forward_args)
    455 
--> 456     output = forward_func(
    457         *(*inputs, *additional_forward_args)
    458         if additional_forward_args is not None

/data/ai-nlp/miniconda3/envs/pl_test/lib/python3.8/site-packages/transformers_interpret/explainers/sequence_classification.py in _forward(self, input_ids, position_ids, attention_mask)
    178 
    179         if self.accepts_position_ids:
--> 180             preds = self.model(
    181                 input_ids,
    182                 position_ids=position_ids,

/data/ai-nlp/miniconda3/envs/pl_test/lib/python3.8/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
    725             result = self._slow_forward(*input, **kwargs)
    726         else:
--> 727             result = self.forward(*input, **kwargs)
    728         for hook in itertools.chain(
    729                 _global_forward_hooks.values(),

/data/ai-nlp/miniconda3/envs/pl_test/lib/python3.8/site-packages/transformers/models/roberta/modeling_roberta.py in forward(self, input_ids, attention_mask, token_type_ids, position_ids, head_mask, inputs_embeds, labels, output_attentions, output_hidden_states, return_dict)
   1198         return_dict = return_dict if return_dict is not None else self.config.use_return_dict
   1199 
-> 1200         outputs = self.roberta(
   1201             input_ids,
   1202             attention_mask=attention_mask,

/data/ai-nlp/miniconda3/envs/pl_test/lib/python3.8/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
    725             result = self._slow_forward(*input, **kwargs)
    726         else:
--> 727             result = self.forward(*input, **kwargs)
    728         for hook in itertools.chain(
    729                 _global_forward_hooks.values(),

/data/ai-nlp/miniconda3/envs/pl_test/lib/python3.8/site-packages/transformers/models/roberta/modeling_roberta.py in forward(self, input_ids, attention_mask, token_type_ids, position_ids, head_mask, inputs_embeds, encoder_hidden_states, encoder_attention_mask, past_key_values, use_cache, output_attentions, output_hidden_states, return_dict)
    814             if hasattr(self.embeddings, "token_type_ids"):
    815                 buffered_token_type_ids = self.embeddings.token_type_ids[:, :seq_length]
--> 816                 buffered_token_type_ids_expanded = buffered_token_type_ids.expand(batch_size, seq_length)
    817                 token_type_ids = buffered_token_type_ids_expanded
    818             else:

RuntimeError: The expanded size of the tensor (583) must match the existing size (514) at non-singleton dimension 1.  Target sizes: [1, 583].  Tensor sizes: [1, 514]
e-tornike commented 1 year ago

Hey @yusufcakmakk,

I stumbled across the same problem. I simply changed some of the parameters to pass max_length to the tokenizer's encode function. See my fork here.

Hope this helps!

cdpierse commented 1 year ago

Hey @e-tornike just looked at your fork, this is great, would you be interested in adding this as contribution?

e-tornike commented 1 year ago

Hi @cdpierse, thanks for having a look at this! I've simplified the truncation further and made a pull request.