Open tanutarou opened 11 months ago
I have the same problem. How can I solve it?
+1
I have the same problem!
I re-wrote the function _def _forwardmlm in generate.py as below and the bug was solved. Not sure if this is the correct way and if it will have an impact on the results. Waiting for the official correction.
def _forward_mlm(words):
output_phrases = []
output_probs = []
for word in words:
input_phrases = [template.format(word) for template in templates]
inputs = tokenizer(input_phrases, return_tensors="pt", padding=True).to(device)
input_ids = inputs["input_ids"]
outputs = model(**inputs)
probs = torch.softmax(outputs.logits, dim=-1).detach()
top_k_out = probs.topk(k=k, dim=-1)
word_probs = []
word_phrases = []
for sent_idx in range(probs.shape[0]):
mask_mask = input_ids[sent_idx] == PAD_TOKEN_ID
mask_range = torch.arange(mask_mask.sum())
token_ids = top_k_out.indices[sent_idx, mask_mask]
token_probs = top_k_out.values[sent_idx, mask_mask]
best_local_idx = token_probs.mean(dim=1).argmax()
output_ids = torch.clone(input_ids[sent_idx])
output_ids[mask_mask] = token_ids[mask_range, best_local_idx]
word_phrases.append(tokenizer.decode(output_ids, skip_special_tokens=True))
# Calculate the mean probability of the selected tokens
mean_probability = token_probs[mask_range, best_local_idx].mean().item() # Now calculating mean first
word_probs.append(mean_probability)
# Select the best phrase based on some criteria, here assumed the highest average probability
best_idx = np.argmax(word_probs)
output_phrases.append(word_phrases[best_idx])
output_probs.append(word_probs[best_idx])
return {"prob": output_probs, "output_phrase": output_phrases}
I re-wrote the function _def _forwardmlm in generate.py as below and the bug was solved. Not sure if this is the correct way and if it will have an impact on the results. Waiting for the official correction.
def _forward_mlm(words): output_phrases = [] output_probs = [] for word in words: input_phrases = [template.format(word) for template in templates] inputs = tokenizer(input_phrases, return_tensors="pt", padding=True).to(device) input_ids = inputs["input_ids"] outputs = model(**inputs) probs = torch.softmax(outputs.logits, dim=-1).detach() top_k_out = probs.topk(k=k, dim=-1) word_probs = [] word_phrases = [] for sent_idx in range(probs.shape[0]): mask_mask = input_ids[sent_idx] == PAD_TOKEN_ID mask_range = torch.arange(mask_mask.sum()) token_ids = top_k_out.indices[sent_idx, mask_mask] token_probs = top_k_out.values[sent_idx, mask_mask] best_local_idx = token_probs.mean(dim=1).argmax() output_ids = torch.clone(input_ids[sent_idx]) output_ids[mask_mask] = token_ids[mask_range, best_local_idx] word_phrases.append(tokenizer.decode(output_ids, skip_special_tokens=True)) # Calculate the mean probability of the selected tokens mean_probability = token_probs[mask_range, best_local_idx].mean().item() # Now calculating mean first word_probs.append(mean_probability) # Select the best phrase based on some criteria, here assumed the highest average probability best_idx = np.argmax(word_probs) output_phrases.append(word_phrases[best_idx]) output_probs.append(word_probs[best_idx]) return {"prob": output_probs, "output_phrase": output_phrases}
It works for me. Thanks a lot!
When I run examples/01_intro.ipynb on Google Colab, I get an error in the following cell.
Could you tell me how to fix this problem? Thank you!