import unicodedata, re
all_chars = (unichr(i) for i in xrange(0x110000))
control_chars = ''.join(c for c in all_chars if unicodedata.category(c)[0] == 'C')
cc_re = re.compile('[%s]' % re.escape(control_chars))
def rm_control_chars(s): # See http://www.unicode.org/reports/tr44/#General_Category_Values
return cc_re.sub('', s)
The invisible characters are something important too =)
From https://pastebin.com/1aR1ivaR