Open allendred opened 9 months ago
from transformers import AutoTokenizer, AutoModel tokenizer = AutoTokenizer.from_pretrained('m3e-base/') model = AutoModel.from_pretrained('m3e-base/') model.eval() def get_sentence_embedding(sentence, tokenizer, model): input_ids = tokenizer.encode(sentence, return_tensors='pt') with torch.no_grad(): outputs = model(input_ids) last_hidden_state = outputs[0] sentence_embedding = torch.mean(last_hidden_state[0], dim=0) return sentence_embedding.numpy()
这种方式调用有什么问题么,和sentence-transformer 结果不一样
是不是差的不太多?一般来讲 Mean 需要考虑 padding 的 token,所以需要 Mask
这种方式调用有什么问题么,和sentence-transformer 结果不一样