Open phanngoc opened 3 months ago
from jiwer import wer, compute_measures
import re
# Example reference and hypothesis sentences
reference = "hello world"
hypothesis = "helo world"
# Compute word error rate and get the differences
measures = compute_measures(reference, hypothesis)
diffs = measures['substitutions'] + measures['insertions'] + measures['deletions']
def highlight_text(reference, hypothesis, diffs):
result = []
ref_idx = 0
hyp_idx = 0
while ref_idx < len(reference) or hyp_idx < len(hypothesis):
if ref_idx < len(reference) and (ref_idx, ref_idx) in diffs:
result.append(f"\033[31m{reference[ref_idx]}\033[0m") # Red for wrong character
ref_idx += 1
elif hyp_idx < len(hypothesis) and (hyp_idx, hyp_idx) in diffs:
result.append(f"\033[32m{hypothesis[hyp_idx]}\033[0m") # Green for correct character
hyp_idx += 1
else:
result.append(reference[ref_idx] if ref_idx < len(reference) else hypothesis[hyp_idx])
ref_idx += 1
hyp_idx += 1
return ''.join(result)
highlighted_text = highlight_text(reference, hypothesis, diffs)
print(highlighted_text)