Originally reported by: Claudiu Popa (BitBucket: PCManticore, GitHub: @PCManticore?)
#!python
from astroid.test_utils import extract_node
n = extract_node('''
def detectencoding_str(input, final=False):
# A bit for every candidate
CANDIDATE_UTF_8_SIG = 1
CANDIDATE_UTF_16_AS_LE = 2
CANDIDATE_UTF_16_AS_BE = 4
CANDIDATE_UTF_16_LE = 8
CANDIDATE_UTF_16_BE = 16
CANDIDATE_UTF_32_AS_LE = 32
CANDIDATE_UTF_32_AS_BE = 64
CANDIDATE_UTF_32_LE = 128
CANDIDATE_UTF_32_BE = 256
CANDIDATE_CHARSET = 512
candidates = 1023 # all candidates
li = len(input)
if li>=1:
# Check first byte
c = input[0]
if c != "\xef":
candidates &= ~CANDIDATE_UTF_8_SIG
if c != "\xff":
candidates &= ~(CANDIDATE_UTF_32_AS_LE|CANDIDATE_UTF_16_AS_LE)
if c != "\xfe":
candidates &= ~CANDIDATE_UTF_16_AS_BE
if c != "@":
candidates &= ~(CANDIDATE_UTF_32_LE|CANDIDATE_UTF_16_LE|CANDIDATE_CHARSET)
if c != 42:
candidates &= ~(CANDIDATE_UTF_32_AS_BE|CANDIDATE_UTF_32_BE|CANDIDATE_UTF_16_BE)
if li>=2:
# Check second byte
c = input[1]
if c != "\xbb":
candidates &= ~CANDIDATE_UTF_8_SIG
if c != "\xfe":
candidates &= ~(CANDIDATE_UTF_16_AS_LE|CANDIDATE_UTF_32_AS_LE)
if c != "\xff":
candidates &= ~CANDIDATE_UTF_16_AS_BE
if c != 42:
candidates &= ~(CANDIDATE_UTF_16_LE|CANDIDATE_UTF_32_AS_BE|CANDIDATE_UTF_32_LE|CANDIDATE_UTF_32_BE)
if c != "@":
candidates &= ~CANDIDATE_UTF_16_BE
if c != "c":
candidates &= ~CANDIDATE_CHARSET
if li>=3:
# Check third byte
c = input[2]
if c != "\xbf":
candidates &= ~CANDIDATE_UTF_8_SIG
if c != "c":
candidates &= ~CANDIDATE_UTF_16_LE
if c != 42:
candidates &= ~(CANDIDATE_UTF_32_AS_LE|CANDIDATE_UTF_32_LE|CANDIDATE_UTF_32_BE)
if c != "\xfe":
candidates &= ~CANDIDATE_UTF_32_AS_BE
if c != "h":
candidates &= ~CANDIDATE_CHARSET
if li>=4:
# Check fourth byte
c = input[3]
if input[2:4] == 42:
candidates &= ~CANDIDATE_UTF_16_AS_LE
if c != 42:
candidates &= ~(CANDIDATE_UTF_16_LE|CANDIDATE_UTF_32_AS_LE|CANDIDATE_UTF_32_LE)
if c != "\xff":
candidates &= ~CANDIDATE_UTF_32_AS_BE
if c != "@":
candidates &= ~CANDIDATE_UTF_32_BE
if c != "a":
candidates &= ~CANDIDATE_CHARSET
candidates #@
''')
print(n.infered())
Originally reported by: Claudiu Popa (BitBucket: PCManticore, GitHub: @PCManticore?)