huggingface / setfit

Efficient few-shot learning with Sentence Transformers
https://hf.co/docs/setfit
Apache License 2.0
2.23k stars 220 forks source link

error export onxx with body roberta #338

Open batman-do opened 1 year ago

batman-do commented 1 year ago

help me fix issue @tomaarsen , thanks u guys

onnx==1.13
onnxruntime==1.14
pytorch==2.0.0+cuda117

image

image

IndexError                                Traceback (most recent call last)
Cell In[7], line 1
----> 1 export_onnx(model.model_body, model.model_head, opset=18, output_path="models/base.onnx")

File /storage/dodx/anaconda3/envs/pipeline_sentiment/lib/python3.8/site-packages/setfit/exporters/onnx.py:227, in export_onnx(model_body, model_head, opset, output_path, ignore_ir_version, use_hummingbird)
    225 if issubclass(type(model_head), models.Dense):
    226     setfit_model = OnnxSetFitModel(transformer, lambda x: model_pooler(x)["sentence_embedding"], model_head).cpu()
--> 227     export_onnx_setfit_model(setfit_model, dummy_inputs, output_path, opset)
    229     # store meta data of the tokenizer for getting the correct tokenizer during inference
    230     onnx_setfit_model = onnx.load(output_path)

File /storage/dodx/anaconda3/envs/pipeline_sentiment/lib/python3.8/site-packages/setfit/exporters/onnx.py:97, in export_onnx_setfit_model(setfit_model, inputs, output_path, opset)
     95 setfit_model.eval()
     96 with torch.no_grad():
---> 97     torch.onnx.export(
     98         setfit_model,
     99         args=args,
    100         f=output_path,
    101         opset_version=opset,
    102         input_names=["input_ids", "attention_mask", "token_type_ids"],
    103         output_names=output_names,
    104         dynamic_axes={**dynamic_axes_input, **dynamic_axes_output},
    105     )

File /storage/dodx/anaconda3/envs/pipeline_sentiment/lib/python3.8/site-packages/torch/onnx/utils.py:506, in export(model, args, f, export_params, verbose, training, input_names, output_names, operator_export_type, opset_version, do_constant_folding, dynamic_axes, keep_initializers_as_inputs, custom_opsets, export_modules_as_functions)
    188 @_beartype.beartype
    189 def export(
    190     model: Union[torch.nn.Module, torch.jit.ScriptModule, torch.jit.ScriptFunction],
   (...)
    206     export_modules_as_functions: Union[bool, Collection[Type[torch.nn.Module]]] = False,
    207 ) -> None:
    208     r"""Exports a model into ONNX format.
    209 
    210     If ``model`` is not a :class:`torch.jit.ScriptModule` nor a
   (...)
    503             All errors are subclasses of :class:`errors.OnnxExporterError`.
    504     """
--> 506     _export(
    507         model,
    508         args,
    509         f,
    510         export_params,
    511         verbose,
    512         training,
    513         input_names,
    514         output_names,
    515         operator_export_type=operator_export_type,
    516         opset_version=opset_version,
    517         do_constant_folding=do_constant_folding,
    518         dynamic_axes=dynamic_axes,
    519         keep_initializers_as_inputs=keep_initializers_as_inputs,
    520         custom_opsets=custom_opsets,
    521         export_modules_as_functions=export_modules_as_functions,
    522     )

File /storage/dodx/anaconda3/envs/pipeline_sentiment/lib/python3.8/site-packages/torch/onnx/utils.py:1548, in _export(model, args, f, export_params, verbose, training, input_names, output_names, operator_export_type, export_type, opset_version, do_constant_folding, dynamic_axes, keep_initializers_as_inputs, fixed_batch_size, custom_opsets, add_node_names, onnx_shape_inference, export_modules_as_functions)
   1545     dynamic_axes = {}
   1546 _validate_dynamic_axes(dynamic_axes, model, input_names, output_names)
-> 1548 graph, params_dict, torch_out = _model_to_graph(
   1549     model,
   1550     args,
   1551     verbose,
   1552     input_names,
   1553     output_names,
   1554     operator_export_type,
   1555     val_do_constant_folding,
   1556     fixed_batch_size=fixed_batch_size,
   1557     training=training,
   1558     dynamic_axes=dynamic_axes,
   1559 )
   1561 # TODO: Don't allocate a in-memory string for the protobuf
   1562 defer_weight_export = (
   1563     export_type is not _exporter_states.ExportTypes.PROTOBUF_FILE
   1564 )

File /storage/dodx/anaconda3/envs/pipeline_sentiment/lib/python3.8/site-packages/torch/onnx/utils.py:1113, in _model_to_graph(model, args, verbose, input_names, output_names, operator_export_type, do_constant_folding, _disable_torch_constant_prop, fixed_batch_size, training, dynamic_axes)
   1110     args = (args,)
   1112 model = _pre_trace_quant_model(model, args)
-> 1113 graph, params, torch_out, module = _create_jit_graph(model, args)
   1114 params_dict = _get_named_param_dict(graph, params)
   1116 try:

File /storage/dodx/anaconda3/envs/pipeline_sentiment/lib/python3.8/site-packages/torch/onnx/utils.py:989, in _create_jit_graph(model, args)
    984     graph = _C._propagate_and_assign_input_shapes(
    985         graph, flattened_args, param_count_list, False, False
    986     )
    987     return graph, params, torch_out, None
--> 989 graph, torch_out = _trace_and_get_graph_from_model(model, args)
    990 _C._jit_pass_onnx_lint(graph)
    991 state_dict = torch.jit._unique_state_dict(model)

File /storage/dodx/anaconda3/envs/pipeline_sentiment/lib/python3.8/site-packages/torch/onnx/utils.py:893, in _trace_and_get_graph_from_model(model, args)
    891 prev_autocast_cache_enabled = torch.is_autocast_cache_enabled()
    892 torch.set_autocast_cache_enabled(False)
--> 893 trace_graph, torch_out, inputs_states = torch.jit._get_trace_graph(
    894     model,
    895     args,
    896     strict=False,
    897     _force_outplace=False,
    898     _return_inputs_states=True,
    899 )
    900 torch.set_autocast_cache_enabled(prev_autocast_cache_enabled)
    902 warn_on_static_input_change(inputs_states)

File /storage/dodx/anaconda3/envs/pipeline_sentiment/lib/python3.8/site-packages/torch/jit/_trace.py:1268, in _get_trace_graph(f, args, kwargs, strict, _force_outplace, return_inputs, _return_inputs_states)
   1266 if not isinstance(args, tuple):
   1267     args = (args,)
-> 1268 outs = ONNXTracedModule(f, strict, _force_outplace, return_inputs, _return_inputs_states)(*args, **kwargs)
   1269 return outs

File /storage/dodx/anaconda3/envs/pipeline_sentiment/lib/python3.8/site-packages/torch/nn/modules/module.py:1501, in Module._call_impl(self, *args, **kwargs)
   1496 # If we don't have any hooks, we want to skip the rest of the logic in
   1497 # this function, and just call forward.
   1498 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
   1499         or _global_backward_pre_hooks or _global_backward_hooks
   1500         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1501     return forward_call(*args, **kwargs)
   1502 # Do not call functions when jit is used
   1503 full_backward_hooks, non_full_backward_hooks = [], []

File /storage/dodx/anaconda3/envs/pipeline_sentiment/lib/python3.8/site-packages/torch/jit/_trace.py:127, in ONNXTracedModule.forward(self, *args)
    124     else:
    125         return tuple(out_vars)
--> 127 graph, out = torch._C._create_graph_by_tracing(
    128     wrapper,
    129     in_vars + module_state,
    130     _create_interpreter_name_lookup_fn(),
    131     self.strict,
    132     self._force_outplace,
    133 )
    135 if self._return_inputs:
    136     return graph, outs[0], ret_inputs[0]

File /storage/dodx/anaconda3/envs/pipeline_sentiment/lib/python3.8/site-packages/torch/jit/_trace.py:118, in ONNXTracedModule.forward.<locals>.wrapper(*args)
    116 if self._return_inputs_states:
    117     inputs_states.append(_unflatten(in_args, in_desc))
--> 118 outs.append(self.inner(*trace_inputs))
    119 if self._return_inputs_states:
    120     inputs_states[0] = (inputs_states[0], trace_inputs)

File /storage/dodx/anaconda3/envs/pipeline_sentiment/lib/python3.8/site-packages/torch/nn/modules/module.py:1501, in Module._call_impl(self, *args, **kwargs)
   1496 # If we don't have any hooks, we want to skip the rest of the logic in
   1497 # this function, and just call forward.
   1498 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
   1499         or _global_backward_pre_hooks or _global_backward_hooks
   1500         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1501     return forward_call(*args, **kwargs)
   1502 # Do not call functions when jit is used
   1503 full_backward_hooks, non_full_backward_hooks = [], []

File /storage/dodx/anaconda3/envs/pipeline_sentiment/lib/python3.8/site-packages/torch/nn/modules/module.py:1488, in Module._slow_forward(self, *input, **kwargs)
   1486         recording_scopes = False
   1487 try:
-> 1488     result = self.forward(*input, **kwargs)
   1489 finally:
   1490     if recording_scopes:

File /storage/dodx/anaconda3/envs/pipeline_sentiment/lib/python3.8/site-packages/setfit/exporters/onnx.py:51, in OnnxSetFitModel.forward(self, input_ids, attention_mask, token_type_ids)
     50 def forward(self, input_ids: torch.Tensor, attention_mask: torch.Tensor, token_type_ids: torch.Tensor):
---> 51     hidden_states = self.model_body(input_ids, attention_mask, token_type_ids)
     52     hidden_states = {"token_embeddings": hidden_states[0], "attention_mask": attention_mask}
     54     embeddings = self.pooler(hidden_states)

File /storage/dodx/anaconda3/envs/pipeline_sentiment/lib/python3.8/site-packages/torch/nn/modules/module.py:1501, in Module._call_impl(self, *args, **kwargs)
   1496 # If we don't have any hooks, we want to skip the rest of the logic in
   1497 # this function, and just call forward.
   1498 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
   1499         or _global_backward_pre_hooks or _global_backward_hooks
   1500         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1501     return forward_call(*args, **kwargs)
   1502 # Do not call functions when jit is used
   1503 full_backward_hooks, non_full_backward_hooks = [], []

File /storage/dodx/anaconda3/envs/pipeline_sentiment/lib/python3.8/site-packages/torch/nn/modules/module.py:1488, in Module._slow_forward(self, *input, **kwargs)
   1486         recording_scopes = False
   1487 try:
-> 1488     result = self.forward(*input, **kwargs)
   1489 finally:
   1490     if recording_scopes:

File /storage/dodx/anaconda3/envs/pipeline_sentiment/lib/python3.8/site-packages/transformers/models/roberta/modeling_roberta.py:845, in RobertaModel.forward(self, input_ids, attention_mask, token_type_ids, position_ids, head_mask, inputs_embeds, encoder_hidden_states, encoder_attention_mask, past_key_values, use_cache, output_attentions, output_hidden_states, return_dict)
    838 # Prepare head mask if needed
    839 # 1.0 in head_mask indicate we keep the head
    840 # attention_probs has shape bsz x n_heads x N x N
    841 # input head_mask has shape [num_heads] or [num_hidden_layers x num_heads]
    842 # and head_mask is converted to shape [num_hidden_layers x batch x num_heads x seq_length x seq_length]
    843 head_mask = self.get_head_mask(head_mask, self.config.num_hidden_layers)
--> 845 embedding_output = self.embeddings(
    846     input_ids=input_ids,
    847     position_ids=position_ids,
    848     token_type_ids=token_type_ids,
    849     inputs_embeds=inputs_embeds,
    850     past_key_values_length=past_key_values_length,
    851 )
    852 encoder_outputs = self.encoder(
    853     embedding_output,
    854     attention_mask=extended_attention_mask,
   (...)
    862     return_dict=return_dict,
    863 )
    864 sequence_output = encoder_outputs[0]

File /storage/dodx/anaconda3/envs/pipeline_sentiment/lib/python3.8/site-packages/torch/nn/modules/module.py:1501, in Module._call_impl(self, *args, **kwargs)
   1496 # If we don't have any hooks, we want to skip the rest of the logic in
   1497 # this function, and just call forward.
   1498 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
   1499         or _global_backward_pre_hooks or _global_backward_hooks
   1500         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1501     return forward_call(*args, **kwargs)
   1502 # Do not call functions when jit is used
   1503 full_backward_hooks, non_full_backward_hooks = [], []

File /storage/dodx/anaconda3/envs/pipeline_sentiment/lib/python3.8/site-packages/torch/nn/modules/module.py:1488, in Module._slow_forward(self, *input, **kwargs)
   1486         recording_scopes = False
   1487 try:
-> 1488     result = self.forward(*input, **kwargs)
   1489 finally:
   1490     if recording_scopes:

File /storage/dodx/anaconda3/envs/pipeline_sentiment/lib/python3.8/site-packages/transformers/models/roberta/modeling_roberta.py:124, in RobertaEmbeddings.forward(self, input_ids, token_type_ids, position_ids, inputs_embeds, past_key_values_length)
    122 if inputs_embeds is None:
    123     inputs_embeds = self.word_embeddings(input_ids)
--> 124 token_type_embeddings = self.token_type_embeddings(token_type_ids)
    126 embeddings = inputs_embeds + token_type_embeddings
    127 if self.position_embedding_type == "absolute":

File /storage/dodx/anaconda3/envs/pipeline_sentiment/lib/python3.8/site-packages/torch/nn/modules/module.py:1501, in Module._call_impl(self, *args, **kwargs)
   1496 # If we don't have any hooks, we want to skip the rest of the logic in
   1497 # this function, and just call forward.
   1498 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
   1499         or _global_backward_pre_hooks or _global_backward_hooks
   1500         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1501     return forward_call(*args, **kwargs)
   1502 # Do not call functions when jit is used
   1503 full_backward_hooks, non_full_backward_hooks = [], []

File /storage/dodx/anaconda3/envs/pipeline_sentiment/lib/python3.8/site-packages/torch/nn/modules/module.py:1488, in Module._slow_forward(self, *input, **kwargs)
   1486         recording_scopes = False
   1487 try:
-> 1488     result = self.forward(*input, **kwargs)
   1489 finally:
   1490     if recording_scopes:

File /storage/dodx/anaconda3/envs/pipeline_sentiment/lib/python3.8/site-packages/torch/nn/modules/sparse.py:162, in Embedding.forward(self, input)
    161 def forward(self, input: Tensor) -> Tensor:
--> 162     return F.embedding(
    163         input, self.weight, self.padding_idx, self.max_norm,
    164         self.norm_type, self.scale_grad_by_freq, self.sparse)

File /storage/dodx/anaconda3/envs/pipeline_sentiment/lib/python3.8/site-packages/torch/nn/functional.py:2210, in embedding(input, weight, padding_idx, max_norm, norm_type, scale_grad_by_freq, sparse)
   2204     # Note [embedding_renorm set_grad_enabled]
   2205     # XXX: equivalent to
   2206     # with torch.no_grad():
   2207     #   torch.embedding_renorm_
   2208     # remove once script supports set_grad_enabled
   2209     _no_grad_embedding_renorm_(weight, input, max_norm, norm_type)
-> 2210 return torch.embedding(weight, input, padding_idx, scale_grad_by_freq, sparse)

IndexError: index out of range in self
tomaarsen commented 1 year ago

I have no experience with ONNX myself, so I can't really comment on what could cause this. However, perhaps @nbertagnolli has some clue of what this could be? That is, if he has some time :)

nbertagnolli commented 1 year ago

Happy to take a look : ). @batman-do could you send over a small working example and I'll poke around and see what I can find?

batman-do commented 1 year ago

I use setfit's version == 0.6.0 and use export_onnx in the repo, I export onnx following examples work with checkpoints sentence-transformers/paraphrase-albert-small-v2 but when using my checkpoint with body roberta above or sentence-transformers/paraphrase-multilingual-mpnet-base-v2 don't work @nbertagnolli

rolshoven commented 1 year ago

I was running into the same error with a roberta-based model body. After some debugging I noticed the following:

In setfit/exporters/onnx.py(51) forward() we have a token_type_idstensor that looks like this:

tensor([[1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]])

And a attention_mask tensor that looks like this:

tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]])

In my opinion, these tensors look as if they were switched. The error then occurs because we are trying to look up both the token_type_id embedding with index 0 and the one with index 1, but there is only one embedding in the matrix (at least for the model stsb-xlm-roberta-base). I created a pull request but without testing the solution for any other model yet. I will do so once I have some more time. For my use case, it is now working. I made a pull request with my changes. If you have the same issue, this should hopefully get it working again 😃

andreeapricopi commented 1 year ago

I ran into this problem too with sentence-transformers/paraphrase-multilingual-mpnet-base-v2, and can also confirm that, as @rolshoven said, the 2 tensors in setfit/exporters/onnx.py(51) seem switched. The changes in the PR solved that issue for me, too. However, I also get a different error when using distiluse-base-multilingual-cased-v2, namely: RuntimeError: The size of tensor a (12) must match the size of tensor b (128) at non-singleton dimension 1.

I'm adding here a short code snippet to reproduce the issues (as inspired from one of @nbertagnolli 's replies ).

from setfit import SetFitModel
from sentence_transformers import SentenceTransformer
from setfit import SetFitHead, SetFitHead, SetFitModel
from setfit.exporters.onnx import export_onnx

# Uncomment for different scenarios
model_id = "sentence-transformers/paraphrase-multilingual-mpnet-base-v2" # Not working, "index out of range"
# model_id = "sentence-transformers/distiluse-base-multilingual-cased-v2" # Not working, tensor sizes error
# model_id = "sentence-transformers/all-MiniLM-L6-v2" # Working

# Load pretrained transformer and create a head
model_body = SentenceTransformer(model_id)
model_head = SetFitHead(in_features = model_body.get_sentence_embedding_dimension(), out_features = 4)
model = SetFitModel(model_body = model_body, model_head = model_head)

# Export model
output_path = "setfit_onnx"

export_onnx(model.model_body,
            model.model_head,
            opset=12,
            output_path=output_path)
lucasalvarezlacasa commented 6 months ago

Any updates on this? I'm facing the same issue but for the instructor-large model. There seems to be an error when loading the attention weights.

geraldstanje commented 5 months ago

@lucasalvarezlacasa how configure export_onnx for gpu and cuda for: sentence-transformers/all-MiniLM-L6-v2 for model head and model body?