Generating embeddings with model "intfloat/multilingual-e5-small" not working

When I use the example from the README with "intfloat/multilingual-e5-small" that is based on sentence-transformers, I receive the following error:

from fast_sentence_transformers import FastSentenceTransformer as SentenceTransformer

encoder = SentenceTransformer("intfloat/multilingual-e5-small", device="cpu")
encoder.encode("Hello hello, hey, hello hello")

AttributeError                            Traceback (most recent call last)
Cell In[56], line 5
      1 from fast_sentence_transformers import FastSentenceTransformer as SentenceTransformer
      3 encoder = SentenceTransformer("intfloat/multilingual-e5-small", device="cpu")
----> 5 encoder.encode("Hello hello, hey, hello hello")

File ~/.venv/lib/python3.11/site-packages/fast_sentence_transformers/fast_sentence_transformers.py:167, in FastSentenceTransformer.encode(self, text, convert_to_numpy)
    164     text = [text]
    165     single_input = True
--> 167 prediction = self.pipeline(text)
    168 if convert_to_numpy:
    169     prediction = np.array([pred.cpu().detach().numpy() for pred in prediction])

File ~/.venv/lib/python3.11/site-packages/transformers/pipelines/base.py:1238, in Pipeline.__call__(self, inputs, num_workers, batch_size, *args, **kwargs)
   1234 if can_use_iterator:
   1235     final_iterator = self.get_iterator(
   1236         inputs, num_workers, batch_size, preprocess_params, forward_params, postprocess_params
   1237     )
-> 1238     outputs = list(final_iterator)
   1239     return outputs
   1240 else:

File ~/.venv/lib/python3.11/site-packages/transformers/pipelines/pt_utils.py:124, in PipelineIterator.__next__(self)
    121     return self.loader_batch_item()
    123 # We're out of items within a batch
--> 124 item = next(self.iterator)
    125 processed = self.infer(item, **self.params)
    126 # We now have a batch of "inferred things".

File ~/.venv/lib/python3.11/site-packages/transformers/pipelines/pt_utils.py:125, in PipelineIterator.__next__(self)
    123 # We're out of items within a batch
    124 item = next(self.iterator)
--> 125 processed = self.infer(item, **self.params)
    126 # We now have a batch of "inferred things".
    127 if self.loader_batch_size is not None:
    128     # Try to infer the size of the batch

File ~/.venv/lib/python3.11/site-packages/transformers/pipelines/base.py:1164, in Pipeline.forward(self, model_inputs, **forward_params)
   1162     with inference_context():
   1163         model_inputs = self._ensure_tensor_on_device(model_inputs, device=self.device)
-> 1164         model_outputs = self._forward(model_inputs, **forward_params)
   1165         model_outputs = self._ensure_tensor_on_device(model_outputs, device=torch.device("cpu"))
   1166 else:

File ~/.venv/lib/python3.11/site-packages/fast_sentence_transformers/fast_sentence_transformers.py:47, in _SentenceEmbeddingPipeline._forward(self, model_inputs)
     37 def _forward(self, model_inputs: dict) -> dict:
     38     """
     39     Perform forward pass on the model.
     40 
   (...)
     45         dict: The model outputs.
     46     """
---> 47     outputs = self.model(**model_inputs)
     48     return {
     49         "outputs": outputs,
     50         "attention_mask": model_inputs["attention_mask"],
     51     }

File ~/.venv/lib/python3.11/site-packages/optimum/modeling_base.py:99, in OptimizedModel.__call__(self, *args, **kwargs)
     98 def __call__(self, *args, **kwargs):
---> 99     return self.forward(*args, **kwargs)

File ~/.venv/lib/python3.11/site-packages/optimum/onnxruntime/modeling_ort.py:1108, in ORTModelForFeatureExtraction.forward(self, input_ids, attention_mask, token_type_ids, **kwargs)
   1105 else:
   1106     model_inputs = {"input_ids": input_ids, "attention_mask": attention_mask, "token_type_ids": token_type_ids}
-> 1108     onnx_inputs = self._prepare_onnx_inputs(use_torch, **model_inputs)
   1109     onnx_outputs = self.model.run(None, onnx_inputs)
   1110     model_outputs = self._prepare_onnx_outputs(use_torch, *onnx_outputs)

File ~/.venv/lib/python3.11/site-packages/optimum/onnxruntime/modeling_ort.py:941, in ORTModel._prepare_onnx_inputs(self, use_torch, **inputs)
    938 onnx_inputs[input_name] = inputs.pop(input_name)
    940 if use_torch:
--> 941     onnx_inputs[input_name] = onnx_inputs[input_name].cpu().detach().numpy()
    943 if onnx_inputs[input_name].dtype != self.input_dtypes[input_name]:
    944     onnx_inputs[input_name] = onnx_inputs[input_name].astype(
    945         TypeHelper.ort_type_to_numpy_type(self.input_dtypes[input_name])
    946     )

AttributeError: 'NoneType' object has no attribute 'cpu'

davidberenstein1957 / fast-sentence-transformers

Generating embeddings with model "intfloat/multilingual-e5-small" not working #21