Open wrefgtzweve opened 4 months ago
Here's a list I've been maintaining (there may be a legitimate use-case for some of these, but I don't really see Thai players on my server, so):
local badCharacters = {
utf8.char(0x00A0), -- U+00A0: NO-BREAK SPACE
utf8.char(0x0335), -- U+0335: COMBINING SHORT STROKE OVERLAY
utf8.char(0x0E3A), -- U+0E3A: THAI CHARACTER PHINTHU
utf8.char(0x0E38), -- U+0E38: THAI CHARACTER SARA U
utf8.char(0x0E39), -- U+0E38: THAI CHARACTER SARA UU
utf8.char(0x0E47), -- U+0E47: THAI CHARACTER MAITAIKHU
utf8.char(0x115A), -- U+115A: HANGUL CHOSEONG KIYEOK-TIKEUT
utf8.char(0x115F), -- U+115F: HANGUL CHOSEONG FILLER
utf8.char(0x1160), -- U+1160: HANGUL JUNGSEONG FILLER
utf8.char(0x180B), -- U+180B: MONGOLIAN FREE VARIATION SELECTOR ONE
utf8.char(0x180C), -- U+180C: MONGOLIAN FREE VARIATION SELECTOR TWO
utf8.char(0x180D), -- U+180D: MONGOLIAN FREE VARIATION SELECTOR THREE
utf8.char(0x2000), -- U+2000: EN QUAD
utf8.char(0x2001), -- U+2001: EM QUAD
utf8.char(0x2002), -- U+2002: EN SPACE
utf8.char(0x2003), -- U+2003: EM SPACE
utf8.char(0x2004), -- U+2004: THREE-PER-EM SPACE
utf8.char(0x2005), -- U+2005: FOUR-PER-EM SPACE
utf8.char(0x2006), -- U+2006: SIX-PER-EM SPACE
utf8.char(0x2007), -- U+2007: FIGURE SPACE
utf8.char(0x2008), -- U+2008: PUNCTUATION SPACE
utf8.char(0x2009), -- U+2009: THIN SPACE
utf8.char(0x200A), -- U+200A: HAIR SPACE
utf8.char(0x200B), -- U+200B: ZERO WIDTH SPACE
utf8.char(0x200C), -- U+200C: ZERO WIDTH NON-JOINER
utf8.char(0x200D), -- U+200D: ZERO WIDTH JOINER
utf8.char(0x200E), -- U+200E: LEFT-TO-RIGHT MARK
utf8.char(0x200F), -- U+200F: RIGHT-TO-LEFT MARK
utf8.char(0x2028), -- U+2028: LINE SEPARATOR
utf8.char(0x2029), -- U+2029: PARAGRAPH SEPARATOR
utf8.char(0x202A), -- U+202A: LEFT-TO-RIGHT EMBEDDING
utf8.char(0x202B), -- U+202B: RIGHT-TO-LEFT EMBEDDING
utf8.char(0x202C), -- U+202C: POP DIRECTIONAL FORMATTING
utf8.char(0x202D), -- U+202D: LEFT-TO-RIGHT OVERRIDE
utf8.char(0x202E), -- U+202E: RIGHT-TO-LEFT OVERRIDE
utf8.char(0x202F), -- U+202F: NARROW NO-BREAK SPACE
utf8.char(0x205F), -- U+205F: MEDIUM MATHEMATICAL SPACE
utf8.char(0x2060), -- U+2060: WORD JOINER
utf8.char(0x2061), -- U+2061: FUNCTION APPLICATION
utf8.char(0x2062), -- U+2062: INVISIBLE TIMES
utf8.char(0x2063), -- U+2063: INVISIBLE SEPARATOR
utf8.char(0x2064), -- U+2064: INVISIBLE PLUS
utf8.char(0x2066), -- U+2066: LEFT-TO-RIGHT ISOLATE
utf8.char(0x2067), -- U+2067: RIGHT-TO-LEFT ISOLATE
utf8.char(0x2068), -- U+2068: FIRST STRONG ISOLATE
utf8.char(0x2069), -- U+2069: POP DIRECTIONAL ISOLATE
utf8.char(0x206A), -- U+206A: INHIBIT SYMMETRIC SWAPPING
utf8.char(0x206B), -- U+206B: ACTIVATE SYMMETRIC SWAPPING
utf8.char(0x206C), -- U+206C: INHIBIT ARABIC FORM SHAPING
utf8.char(0x206D), -- U+206D: ACTIVATE ARABIC FORM SHAPING
utf8.char(0x206E), -- U+206E: NATIONAL DIGIT SHAPES
utf8.char(0x206F), -- U+206F: NOMINAL DIGIT SHAPES
utf8.char(0x3000), -- U+3000: IDEOGRAPHIC SPACE
utf8.char(0x3164), -- U+3164: HANGUL FILLER
utf8.char(0xFEFF), -- U+FEFF: ZERO WIDTH NO-BREAK SPACE
utf8.char(0xFFA0), -- U+FFA0: HALFWIDTH HANGUL FILLER
utf8.char(0xFFF9), -- U+FFF9: INTERLINEAR ANNOTATION ANCHOR
utf8.char(0xFFFA), -- U+FFFA: INTERLINEAR ANNOTATION SEPARATOR
utf8.char(0xFFFB), -- U+FFFB: INTERLINEAR ANNOTATION TERMINATOR
utf8.char(0xE0001), -- U+E0001: LANGUAGE TAG
utf8.char(0xE0020), -- U+E0020: TAG SPACE
utf8.char(0xE0021), -- U+E0021: TAG EXCLAMATION MARK
utf8.char(0xE0022), -- U+E0022: TAG QUOTATION MARK
utf8.char(0xE0023), -- U+E0023: TAG NUMBER SIGN
utf8.char(0xE0024), -- U+E0024: TAG DOLLAR SIGN
utf8.char(0xE0025), -- U+E0025: TAG PERCENT SIGN
utf8.char(0xE0026), -- U+E0026: TAG AMPERSAND
utf8.char(0xE0027), -- U+E0027: TAG APOSTROPHE
utf8.char(0xE0028), -- U+E0028: TAG LEFT PARENTHESIS
utf8.char(0xE0029), -- U+E0029: TAG RIGHT PARENTHESIS
utf8.char(0xE002A), -- U+E002A: TAG ASTERISK
utf8.char(0xE002B), -- U+E002B: TAG PLUS SIGN
utf8.char(0xE002C), -- U+E002C: TAG COMMA
utf8.char(0xE002D), -- U+E002D: TAG HYPHEN-MINUS
utf8.char(0xE002E), -- U+E002E: TAG FULL STOP
utf8.char(0xE002F), -- U+E002F: TAG SOLIDUS
utf8.char(0xE0030), -- U+E0030: TAG DIGIT ZERO
utf8.char(0xE0031), -- U+E0031: TAG DIGIT ONE
utf8.char(0xE0032), -- U+E0032: TAG DIGIT TWO
utf8.char(0xE0033), -- U+E0033: TAG DIGIT THREE
utf8.char(0xE0034), -- U+E0034: TAG DIGIT FOUR
utf8.char(0xE0035), -- U+E0035: TAG DIGIT FIVE
utf8.char(0xE0036), -- U+E0036: TAG DIGIT SIX
utf8.char(0xE0037), -- U+E0037: TAG DIGIT SEVEN
utf8.char(0xE0038), -- U+E0038: TAG DIGIT EIGHT
utf8.char(0xE0039), -- U+E0039: TAG DIGIT NINE
utf8.char(0xE003A), -- U+E003A: TAG COLON
utf8.char(0xE003B), -- U+E003B: TAG SEMICOLON
utf8.char(0xE003C), -- U+E003C: TAG LESS-THAN SIGN
utf8.char(0xE003D), -- U+E003D: TAG EQUALS SIGN
utf8.char(0xE003E), -- U+E003E: TAG GREATER-THAN SIGN
utf8.char(0xE003F), -- U+E003F: TAG QUESTION MARK
utf8.char(0xE0040), -- U+E0040: TAG COMMERCIAL AT
utf8.char(0xE0041), -- U+E0041: TAG LATIN CAPITAL LETTER A
utf8.char(0xE0042), -- U+E0042: TAG LATIN CAPITAL LETTER B
utf8.char(0xE0043), -- U+E0043: TAG LATIN CAPITAL LETTER C
utf8.char(0xE0044), -- U+E0044: TAG LATIN CAPITAL LETTER D
utf8.char(0xE0045), -- U+E0045: TAG LATIN CAPITAL LETTER E
utf8.char(0xE0046), -- U+E0046: TAG LATIN CAPITAL LETTER F
utf8.char(0xE0047), -- U+E0047: TAG LATIN CAPITAL LETTER G
utf8.char(0xE0048), -- U+E0048: TAG LATIN CAPITAL LETTER H
utf8.char(0xE0049), -- U+E0049: TAG LATIN CAPITAL LETTER I
utf8.char(0xE004A), -- U+E004A: TAG LATIN CAPITAL LETTER J
utf8.char(0xE004B), -- U+E004B: TAG LATIN CAPITAL LETTER K
utf8.char(0xE004C), -- U+E004C: TAG LATIN CAPITAL LETTER L
utf8.char(0xE004D), -- U+E004D: TAG LATIN CAPITAL LETTER M
utf8.char(0xE004E), -- U+E004E: TAG LATIN CAPITAL LETTER N
utf8.char(0xE004F), -- U+E004F: TAG LATIN CAPITAL LETTER O
utf8.char(0xE0050), -- U+E0050: TAG LATIN CAPITAL LETTER P
utf8.char(0xE0051), -- U+E0051: TAG LATIN CAPITAL LETTER Q
utf8.char(0xE0052), -- U+E0052: TAG LATIN CAPITAL LETTER R
utf8.char(0xE0053), -- U+E0053: TAG LATIN CAPITAL LETTER S
utf8.char(0xE0054), -- U+E0054: TAG LATIN CAPITAL LETTER T
utf8.char(0xE0055), -- U+E0055: TAG LATIN CAPITAL LETTER U
utf8.char(0xE0056), -- U+E0056: TAG LATIN CAPITAL LETTER V
utf8.char(0xE0057), -- U+E0057: TAG LATIN CAPITAL LETTER W
utf8.char(0xE0058), -- U+E0058: TAG LATIN CAPITAL LETTER X
utf8.char(0xE0059), -- U+E0059: TAG LATIN CAPITAL LETTER Y
utf8.char(0xE005A), -- U+E005A: TAG LATIN CAPITAL LETTER Z
utf8.char(0xE005B), -- U+E005B: TAG LEFT SQUARE BRACKET
utf8.char(0xE005C), -- U+E005C: TAG REVERSE SOLIDUS
utf8.char(0xE005D), -- U+E005D: TAG RIGHT SQUARE BRACKET
utf8.char(0xE005E), -- U+E005E: TAG CIRCUMFLEX ACCENT
utf8.char(0xE005F), -- U+E005F: TAG LOW LINE
utf8.char(0xE0060), -- U+E0060: TAG GRAVE ACCENT
utf8.char(0xE0061), -- U+E0061: TAG LATIN SMALL LETTER A
utf8.char(0xE0062), -- U+E0062: TAG LATIN SMALL LETTER B
utf8.char(0xE0063), -- U+E0063: TAG LATIN SMALL LETTER C
utf8.char(0xE0064), -- U+E0064: TAG LATIN SMALL LETTER D
utf8.char(0xE0065), -- U+E0065: TAG LATIN SMALL LETTER E
utf8.char(0xE0066), -- U+E0066: TAG LATIN SMALL LETTER F
utf8.char(0xE0067), -- U+E0067: TAG LATIN SMALL LETTER G
utf8.char(0xE0068), -- U+E0068: TAG LATIN SMALL LETTER H
utf8.char(0xE0069), -- U+E0069: TAG LATIN SMALL LETTER I
utf8.char(0xE006A), -- U+E006A: TAG LATIN SMALL LETTER J
utf8.char(0xE006B), -- U+E006B: TAG LATIN SMALL LETTER K
utf8.char(0xE006C), -- U+E006C: TAG LATIN SMALL LETTER L
utf8.char(0xE006D), -- U+E006D: TAG LATIN SMALL LETTER M
utf8.char(0xE006E), -- U+E006E: TAG LATIN SMALL LETTER N
utf8.char(0xE006F), -- U+E006F: TAG LATIN SMALL LETTER O
utf8.char(0xE0070), -- U+E0070: TAG LATIN SMALL LETTER P
utf8.char(0xE0071), -- U+E0071: TAG LATIN SMALL LETTER Q
utf8.char(0xE0072), -- U+E0072: TAG LATIN SMALL LETTER R
utf8.char(0xE0073), -- U+E0073: TAG LATIN SMALL LETTER S
utf8.char(0xE0074), -- U+E0074: TAG LATIN SMALL LETTER T
utf8.char(0xE0075), -- U+E0075: TAG LATIN SMALL LETTER U
utf8.char(0xE0076), -- U+E0076: TAG LATIN SMALL LETTER V
utf8.char(0xE0077), -- U+E0077: TAG LATIN SMALL LETTER W
utf8.char(0xE0078), -- U+E0078: TAG LATIN SMALL LETTER X
utf8.char(0xE0079), -- U+E0079: TAG LATIN SMALL LETTER Y
utf8.char(0xE007A), -- U+E007A: TAG LATIN SMALL LETTER Z
utf8.char(0xE007B), -- U+E007B: TAG LEFT CURLY BRACKET
utf8.char(0xE007C), -- U+E007C: TAG VERTICAL LINE
utf8.char(0xE007D), -- U+E007D: TAG RIGHT CURLY BRACKET
utf8.char(0xE007E), -- U+E007E: TAG TILDE
utf8.char(0xE007F), -- U+E007F: CANCEL TAG
"\r", -- Carriage return (CR)
"\n", -- Newline (LF)
"\t" -- Tab
}
Players are also viable to paint entire server console to different colors, if they call ErrorNoHalt
that will contain ASCII escape sequences
, this answer on StackOverflow shows a lot of possibilities that people can abuse.
Image below shows how some player changed his nickname that contains ANSI color sequences, and console got wrecked.
The following characters allow players to have invisible names, i think there are far more but these are the ones I've been able to test and verify. A whitelist might be good instead (which probably comes with its own issues) or maybe some kind of range block for these characters.
https://unicode-explorer.com/c/200B
https://unicode-explorer.com/c/2029
https://unicode-explorer.com/c/202A https://unicode-explorer.com/c/202B https://unicode-explorer.com/c/202C https://unicode-explorer.com/c/202D https://unicode-explorer.com/c/202E https://unicode-explorer.com/c/2000 https://unicode-explorer.com/c/2001 https://unicode-explorer.com/c/2007 https://unicode-explorer.com/c/2008 https://unicode-explorer.com/c/2002 https://unicode-explorer.com/c/2009 https://unicode-explorer.com/c/2003 https://unicode-explorer.com/c/200A https://unicode-explorer.com/c/2004 https://unicode-explorer.com/c/2005 https://unicode-explorer.com/c/2006 https://unicode-explorer.com/c/2061 https://unicode-explorer.com/c/2062 https://unicode-explorer.com/c/2063 https://unicode-explorer.com/c/2064 https://unicode-explorer.com/c/2066 https://unicode-explorer.com/c/2067 https://unicode-explorer.com/c/2068 https://unicode-explorer.com/c/2069 https://unicode-explorer.com/c/206F
https://unicode-explorer.com/c/200E
https://unicode-explorer.com/c/2028