selfboot / AnnotatedShadowSocks

Annotated shadowsocks(python version)
Other
3 stars 1 forks source link

why need _decode_dict #4

Open selfboot opened 7 years ago

selfboot commented 7 years ago

In utils.py ,there is a _decode_dict(data), which is used as a hook in https://github.com/xuelangZF/AnnotatedShadowSocks/blob/master/shadowsocks/utils.py#L126 .

def _decode_list(data):
    rv = []
    for item in data:
        if hasattr(item, 'encode'):
            item = item.encode('utf-8')
        elif isinstance(item, list):
            item = _decode_list(item)
        elif isinstance(item, dict):
            item = _decode_dict(item)
        rv.append(item)
    return rv

def _decode_dict(data):
    rv = {}
    for key, value in data.items():
        if hasattr(value, 'encode'):
            value = value.encode('utf-8')
        elif isinstance(value, list):
            value = _decode_list(value)
        elif isinstance(value, dict):
            value = _decode_dict(value)
        rv[key] = value
    return rv

It's used to convert the value in data to str type(encode with utf-8).

More details: Json.loads performs the following translations in decoding by default when deserialize s to a Python object.

2017-05-15 4 39 57

As we can see, string in JSON is translated to unicode type. So _decode_dict recursively encode all the unicode to str with utf-8 used.

selfboot commented 7 years ago

Create a 4_hook.py to see the difference between using the _decode_list or not.

#!/usr/bin/python
# -*- coding: utf-8 -*-

from __future__ import print_function
import json

json_content = u"""
{
    "server":"45.76.222.198",
    "server_port":8888,
    "local_address": "127.0.0.1",
    "local_port":1080,
    "password":"self boot",
    "timeout":300,
    "method":"aes-256-cfb",
    "comments": ["中文内容", 1, 2]
}
"""

def _decode_list(data):
    rv = []
    for item in data:
        if hasattr(item, 'encode'):
            item = item.encode('utf-8')
        elif isinstance(item, list):
            item = _decode_list(item)
        elif isinstance(item, dict):
            item = _decode_dict(item)
        rv.append(item)
    return rv

def _decode_dict(data):
    rv = {}
    for key, value in data.items():
        if hasattr(value, 'encode'):
            value = value.encode('utf-8')
        elif isinstance(value, list):
            value = _decode_list(value)
        elif isinstance(value, dict):
            value = _decode_dict(value)
        rv[key] = value
    return rv

if __name__ == "__main__":
    # Without object_hook
    config = json.loads(json_content)
    print(config)

    # With object_hook
    config_2 = json.loads(json_content, object_hook=_decode_dict)
    print(config_2)

    # json content is str before load.
    config_3 = json.loads(json_content.encode("utf-8"))
    print(config_3)

python test.py result:

{u'server_port': 8888, u'local_port': 1080, u'comments': [u'\u4e2d\u6587\u5185\u5bb9', 1, 2], u'server': u'45.76.222.198', u'timeout': 300, u'local_address': u'127.0.0.1', u'password': u'selfboot', u'method': u'aes-256-cfb'} {u'server_port': 8888, u'local_port': 1080, u'comments': ['\xe4\xb8\xad\xe6\x96\x87\xe5\x86\x85\xe5\xae\xb9', 1, 2], u'server': '45.76.222.198', u'timeout': 300, u'local_address': '127.0.0.1', u'password': 'selfboot', u'method': 'aes-256-cfb'} {u'server_port': 8888, u'local_port': 1080, u'comments': [u'\u4e2d\u6587\u5185\u5bb9', 1, 2], u'server': u'45.76.222.198', u'timeout': 300, u'local_address': u'127.0.0.1', u'password': u'selfboot', u'method': u'aes-256-cfb'}