huggingface / setfit

Efficient few-shot learning with Sentence Transformers
https://hf.co/docs/setfit
Apache License 2.0
2.24k stars 222 forks source link

RuntimeError: Placeholder storage has not been allocated on MPS device! #569

Open agombert opened 2 weeks ago

agombert commented 2 weeks ago

Hello,

Thanks for the great work. I wanted to use the initial example on the README.md:

from datasets import load_dataset
from setfit import SetFitModel, Trainer, TrainingArguments, sample_dataset
import torch

import os
os.environ["PYTORCH_MPS_FORCE_DISABLE"] = "1" # same error with or without this

# Load a dataset from the Hugging Face Hub
dataset = load_dataset("sst2")

device = torch.device("cpu") # same error with or without this

# Simulate the few-shot regime by sampling 8 examples per class
train_dataset = sample_dataset(dataset["train"], label_column="label", num_samples=8)
eval_dataset = dataset["validation"].select(range(100))
test_dataset = dataset["validation"].select(range(100, len(dataset["validation"])))

# Load a SetFit model from Hub
model = SetFitModel.from_pretrained(
    "sentence-transformers/paraphrase-mpnet-base-v2",
    labels=["negative", "positive"],
    device=device
)

args = TrainingArguments(
    batch_size=16,
    num_epochs=4,
    eval_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True
)

trainer = Trainer(
    model=model,
    args=args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    metric="accuracy",
    column_mapping={"sentence": "text", "label": "label"}  # Map dataset columns to text/label expected by trainer
)

trainer.train()

And get this error:

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
Cell In[11], line 2
      1 # Train and evaluate
----> 2 trainer.train()

File ~/Library/Caches/pypoetry/virtualenvs/impactai-extraction-paper-5l9T0jAp-py3.11/lib/python3.11/site-packages/setfit/trainer.py:518, in Trainer.train(self, args, trial, **kwargs)
    513 train_parameters = self.dataset_to_parameters(self.train_dataset)
    514 full_parameters = (
    515     train_parameters + self.dataset_to_parameters(self.eval_dataset) if self.eval_dataset else train_parameters
    516 )
--> 518 self.train_embeddings(*full_parameters, args=args)
    519 self.train_classifier(*train_parameters, args=args)

File ~/Library/Caches/pypoetry/virtualenvs/impactai-extraction-paper-5l9T0jAp-py3.11/lib/python3.11/site-packages/setfit/trainer.py:569, in Trainer.train_embeddings(self, x_train, y_train, x_eval, y_eval, args)
    561 if loss in (
    562     losses.BatchAllTripletLoss,
    563     losses.BatchHardTripletLoss,
   (...)
    566     SupConLoss,
    567 ):
    568     self.st_trainer.args.batch_sampler = BatchSamplers.GROUP_BY_LABEL
--> 569 self.st_trainer.train()

File ~/Library/Caches/pypoetry/virtualenvs/impactai-extraction-paper-5l9T0jAp-py3.11/lib/python3.11/site-packages/transformers/trainer.py:1932, in Trainer.train(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)
   1930         hf_hub_utils.enable_progress_bars()
   1931 else:
-> 1932     return inner_training_loop(
   1933         args=args,
   1934         resume_from_checkpoint=resume_from_checkpoint,
   1935         trial=trial,
   1936         ignore_keys_for_eval=ignore_keys_for_eval,
   1937     )

File ~/Library/Caches/pypoetry/virtualenvs/impactai-extraction-paper-5l9T0jAp-py3.11/lib/python3.11/site-packages/transformers/trainer.py:2268, in Trainer._inner_training_loop(self, batch_size, args, resume_from_checkpoint, trial, ignore_keys_for_eval)
   2265     self.control = self.callback_handler.on_step_begin(args, self.state, self.control)
   2267 with self.accelerator.accumulate(model):
-> 2268     tr_loss_step = self.training_step(model, inputs)
   2270 if (
   2271     args.logging_nan_inf_filter
   2272     and not is_torch_xla_available()
   2273     and (torch.isnan(tr_loss_step) or torch.isinf(tr_loss_step))
   2274 ):
   2275     # if loss is nan or inf simply add the average of previous logged losses
   2276     tr_loss += tr_loss / (1 + self.state.global_step - self._globalstep_last_logged)

File ~/Library/Caches/pypoetry/virtualenvs/impactai-extraction-paper-5l9T0jAp-py3.11/lib/python3.11/site-packages/transformers/trainer.py:3307, in Trainer.training_step(self, model, inputs)
   3304     return loss_mb.reduce_mean().detach().to(self.args.device)
   3306 with self.compute_loss_context_manager():
-> 3307     loss = self.compute_loss(model, inputs)
   3309 del inputs
   3311 kwargs = {}

File ~/Library/Caches/pypoetry/virtualenvs/impactai-extraction-paper-5l9T0jAp-py3.11/lib/python3.11/site-packages/sentence_transformers/trainer.py:348, in SentenceTransformerTrainer.compute_loss(self, model, inputs, return_outputs)
    341 if (
    342     model == self.model_wrapped
    343     and model != self.model  # Only if the model is wrapped
    344     and hasattr(loss_fn, "model")  # Only if the loss stores the model
    345     and loss_fn.model != model  # Only if the wrapped model is not already stored
    346 ):
    347     loss_fn = self.override_model_in_loss(loss_fn, model)
--> 348 loss = loss_fn(features, labels)
    349 if return_outputs:
    350     # During prediction/evaluation, `compute_loss` will be called with `return_outputs=True`.
    351     # However, Sentence Transformer losses do not return outputs, so we return an empty dictionary.
    352     # This does not result in any problems, as the SentenceTransformerTrainingArguments sets
    353     # `prediction_loss_only=True` which means that the output is not used.
    354     return loss, {}

File ~/Library/Caches/pypoetry/virtualenvs/impactai-extraction-paper-5l9T0jAp-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:1736, in Module._wrapped_call_impl(self, *args, **kwargs)
   1734     return self._compiled_call_impl(*args, **kwargs)  # type: ignore[misc]
   1735 else:
-> 1736     return self._call_impl(*args, **kwargs)

File ~/Library/Caches/pypoetry/virtualenvs/impactai-extraction-paper-5l9T0jAp-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:1747, in Module._call_impl(self, *args, **kwargs)
   1742 # If we don't have any hooks, we want to skip the rest of the logic in
   1743 # this function, and just call forward.
   1744 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
   1745         or _global_backward_pre_hooks or _global_backward_hooks
   1746         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1747     return forward_call(*args, **kwargs)
   1749 result = None
   1750 called_always_called_hooks = set()

File ~/Library/Caches/pypoetry/virtualenvs/impactai-extraction-paper-5l9T0jAp-py3.11/lib/python3.11/site-packages/sentence_transformers/losses/CosineSimilarityLoss.py:78, in CosineSimilarityLoss.forward(self, sentence_features, labels)
     77 def forward(self, sentence_features: Iterable[dict[str, Tensor]], labels: Tensor) -> Tensor:
---> 78     embeddings = [self.model(sentence_feature)["sentence_embedding"] for sentence_feature in sentence_features]
     79     output = self.cos_score_transformation(torch.cosine_similarity(embeddings[0], embeddings[1]))
     80     return self.loss_fct(output, labels.float().view(-1))

File ~/Library/Caches/pypoetry/virtualenvs/impactai-extraction-paper-5l9T0jAp-py3.11/lib/python3.11/site-packages/sentence_transformers/losses/CosineSimilarityLoss.py:78, in <listcomp>(.0)
     77 def forward(self, sentence_features: Iterable[dict[str, Tensor]], labels: Tensor) -> Tensor:
---> 78     embeddings = [self.model(sentence_feature)["sentence_embedding"] for sentence_feature in sentence_features]
     79     output = self.cos_score_transformation(torch.cosine_similarity(embeddings[0], embeddings[1]))
     80     return self.loss_fct(output, labels.float().view(-1))

File ~/Library/Caches/pypoetry/virtualenvs/impactai-extraction-paper-5l9T0jAp-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:1736, in Module._wrapped_call_impl(self, *args, **kwargs)
   1734     return self._compiled_call_impl(*args, **kwargs)  # type: ignore[misc]
   1735 else:
-> 1736     return self._call_impl(*args, **kwargs)

File ~/Library/Caches/pypoetry/virtualenvs/impactai-extraction-paper-5l9T0jAp-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:1747, in Module._call_impl(self, *args, **kwargs)
   1742 # If we don't have any hooks, we want to skip the rest of the logic in
   1743 # this function, and just call forward.
   1744 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
   1745         or _global_backward_pre_hooks or _global_backward_hooks
   1746         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1747     return forward_call(*args, **kwargs)
   1749 result = None
   1750 called_always_called_hooks = set()

File ~/Library/Caches/pypoetry/virtualenvs/impactai-extraction-paper-5l9T0jAp-py3.11/lib/python3.11/site-packages/sentence_transformers/SentenceTransformer.py:688, in SentenceTransformer.forward(self, input, **kwargs)
    686     module_kwarg_keys = self.module_kwargs.get(module_name, [])
    687     module_kwargs = {key: value for key, value in kwargs.items() if key in module_kwarg_keys}
--> 688     input = module(input, **module_kwargs)
    689 return input

File ~/Library/Caches/pypoetry/virtualenvs/impactai-extraction-paper-5l9T0jAp-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:1736, in Module._wrapped_call_impl(self, *args, **kwargs)
   1734     return self._compiled_call_impl(*args, **kwargs)  # type: ignore[misc]
   1735 else:
-> 1736     return self._call_impl(*args, **kwargs)

File ~/Library/Caches/pypoetry/virtualenvs/impactai-extraction-paper-5l9T0jAp-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:1747, in Module._call_impl(self, *args, **kwargs)
   1742 # If we don't have any hooks, we want to skip the rest of the logic in
   1743 # this function, and just call forward.
   1744 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
   1745         or _global_backward_pre_hooks or _global_backward_hooks
   1746         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1747     return forward_call(*args, **kwargs)
   1749 result = None
   1750 called_always_called_hooks = set()

File ~/Library/Caches/pypoetry/virtualenvs/impactai-extraction-paper-5l9T0jAp-py3.11/lib/python3.11/site-packages/sentence_transformers/models/Transformer.py:350, in Transformer.forward(self, features, **kwargs)
    347 if "token_type_ids" in features:
    348     trans_features["token_type_ids"] = features["token_type_ids"]
--> 350 output_states = self.auto_model(**trans_features, **kwargs, return_dict=False)
    351 output_tokens = output_states[0]
    353 features.update({"token_embeddings": output_tokens, "attention_mask": features["attention_mask"]})

File ~/Library/Caches/pypoetry/virtualenvs/impactai-extraction-paper-5l9T0jAp-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:1736, in Module._wrapped_call_impl(self, *args, **kwargs)
   1734     return self._compiled_call_impl(*args, **kwargs)  # type: ignore[misc]
   1735 else:
-> 1736     return self._call_impl(*args, **kwargs)

File ~/Library/Caches/pypoetry/virtualenvs/impactai-extraction-paper-5l9T0jAp-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:1747, in Module._call_impl(self, *args, **kwargs)
   1742 # If we don't have any hooks, we want to skip the rest of the logic in
   1743 # this function, and just call forward.
   1744 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
   1745         or _global_backward_pre_hooks or _global_backward_hooks
   1746         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1747     return forward_call(*args, **kwargs)
   1749 result = None
   1750 called_always_called_hooks = set()

File ~/Library/Caches/pypoetry/virtualenvs/impactai-extraction-paper-5l9T0jAp-py3.11/lib/python3.11/site-packages/transformers/models/mpnet/modeling_mpnet.py:543, in MPNetModel.forward(self, input_ids, attention_mask, position_ids, head_mask, inputs_embeds, output_attentions, output_hidden_states, return_dict, **kwargs)
    540 extended_attention_mask: torch.Tensor = self.get_extended_attention_mask(attention_mask, input_shape)
    542 head_mask = self.get_head_mask(head_mask, self.config.num_hidden_layers)
--> 543 embedding_output = self.embeddings(input_ids=input_ids, position_ids=position_ids, inputs_embeds=inputs_embeds)
    544 encoder_outputs = self.encoder(
    545     embedding_output,
    546     attention_mask=extended_attention_mask,
   (...)
    550     return_dict=return_dict,
    551 )
    552 sequence_output = encoder_outputs[0]

File ~/Library/Caches/pypoetry/virtualenvs/impactai-extraction-paper-5l9T0jAp-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:1736, in Module._wrapped_call_impl(self, *args, **kwargs)
   1734     return self._compiled_call_impl(*args, **kwargs)  # type: ignore[misc]
   1735 else:
-> 1736     return self._call_impl(*args, **kwargs)

File ~/Library/Caches/pypoetry/virtualenvs/impactai-extraction-paper-5l9T0jAp-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:1747, in Module._call_impl(self, *args, **kwargs)
   1742 # If we don't have any hooks, we want to skip the rest of the logic in
   1743 # this function, and just call forward.
   1744 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
   1745         or _global_backward_pre_hooks or _global_backward_hooks
   1746         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1747     return forward_call(*args, **kwargs)
   1749 result = None
   1750 called_always_called_hooks = set()

File ~/Library/Caches/pypoetry/virtualenvs/impactai-extraction-paper-5l9T0jAp-py3.11/lib/python3.11/site-packages/transformers/models/mpnet/modeling_mpnet.py:101, in MPNetEmbeddings.forward(self, input_ids, position_ids, inputs_embeds, **kwargs)
     98     position_ids = self.position_ids[:, :seq_length]
    100 if inputs_embeds is None:
--> 101     inputs_embeds = self.word_embeddings(input_ids)
    102 position_embeddings = self.position_embeddings(position_ids)
    104 embeddings = inputs_embeds + position_embeddings

File ~/Library/Caches/pypoetry/virtualenvs/impactai-extraction-paper-5l9T0jAp-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:1736, in Module._wrapped_call_impl(self, *args, **kwargs)
   1734     return self._compiled_call_impl(*args, **kwargs)  # type: ignore[misc]
   1735 else:
-> 1736     return self._call_impl(*args, **kwargs)

File ~/Library/Caches/pypoetry/virtualenvs/impactai-extraction-paper-5l9T0jAp-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:1747, in Module._call_impl(self, *args, **kwargs)
   1742 # If we don't have any hooks, we want to skip the rest of the logic in
   1743 # this function, and just call forward.
   1744 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
   1745         or _global_backward_pre_hooks or _global_backward_hooks
   1746         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1747     return forward_call(*args, **kwargs)
   1749 result = None
   1750 called_always_called_hooks = set()

File ~/Library/Caches/pypoetry/virtualenvs/impactai-extraction-paper-5l9T0jAp-py3.11/lib/python3.11/site-packages/torch/nn/modules/sparse.py:190, in Embedding.forward(self, input)
    189 def forward(self, input: Tensor) -> Tensor:
--> 190     return F.embedding(
    191         input,
    192         self.weight,
    193         self.padding_idx,
    194         self.max_norm,
    195         self.norm_type,
    196         self.scale_grad_by_freq,
    197         self.sparse,
    198     )

File ~/Library/Caches/pypoetry/virtualenvs/impactai-extraction-paper-5l9T0jAp-py3.11/lib/python3.11/site-packages/torch/nn/functional.py:2551, in embedding(input, weight, padding_idx, max_norm, norm_type, scale_grad_by_freq, sparse)
   2545     # Note [embedding_renorm set_grad_enabled]
   2546     # XXX: equivalent to
   2547     # with torch.no_grad():
   2548     #   torch.embedding_renorm_
   2549     # remove once script supports set_grad_enabled
   2550     _no_grad_embedding_renorm_(weight, input, max_norm, norm_type)
-> 2551 return torch.embedding(weight, input, padding_idx, scale_grad_by_freq, sparse)

RuntimeError: Placeholder storage has not been allocated on MPS device!

Torch version: 2.5.1 MPS available: True SetFit version: 1.1.0 Transformers version: 4.42.2 ProductName: macOS ProductVersion: 15.0.1 arm64

arianpasquali commented 13 hours ago

Same issue here. Exactly same environment.