In utils.py, document actions in normalizeString function (lines 22-33, see here):
def normalizeString(s, uni2ascii=False, lowercase=False, strip=False, only_latin_letters=False, prefix_suffix=["|", "|"]):
if uni2ascii:
s = unicodedata.normalize('NFKD', str(s))
if lowercase:
s = s.lower()
if strip:
s = s.strip()
if only_latin_letters:
s = re.sub(r"([.!?])", r" \1", s)
s = re.sub(r"[^a-zA-Z.!?]+", r" ", s)
return prefix_suffix[0] + s + prefix_suffix[1]
In
utils.py
, document actions innormalizeString
function (lines 22-33, see here):