microsoft / TaskWeaver

A code-first agent framework for seamlessly planning and executing data analytics tasks.
https://microsoft.github.io/TaskWeaver/
MIT License
5.37k stars 688 forks source link

add value mode for JSON parser #372

Closed Jack-Q closed 5 months ago

Jack-Q commented 5 months ago

Add include_all_values option for stream JSON parser to emit the whole value after parsing an array/map. Defaults to false to skip collecting the intermediate values in parsing.

For the following case with option include_all_values=True:

from taskweaver.utils.json_parser import parse_json_stream
import json

for ev in parse_json_stream(json.dumps({
    "str_val": "hello world",
    "int_val": 42,
    "float_val": 3.14,
    "bool_val": True,
    "null_val": None,
    "list_val": [1, 2, 3],
    "dict_val": {"a": 1, "b": 2, "c": 3},
    "nested_val": {
        "a": 1,
        "b": 2,
        "c": {
            "x": 10,
            "y": 20,
            "z": 30
        }
    }
}), include_all_values=True, skip_ws=True):
    if not ev.is_end:
        continue
    if ev.value is None:
        continue
    print(ev)

The output would be like below:

ParserEvent(prefix='', event='map_key', value='str_val', value_str='', is_end=True)
ParserEvent(prefix='.str_val', event='string', value='hello world', value_str='', is_end=True)
ParserEvent(prefix='', event='map_key', value='int_val', value_str='', is_end=True)
ParserEvent(prefix='.int_val', event='number', value=42, value_str='', is_end=True)
ParserEvent(prefix='', event='map_key', value='float_val', value_str='', is_end=True)
ParserEvent(prefix='.float_val', event='number', value=3.14, value_str='', is_end=True)
ParserEvent(prefix='', event='map_key', value='bool_val', value_str='', is_end=True)
ParserEvent(prefix='.bool_val', event='boolean', value=True, value_str='true', is_end=True)
ParserEvent(prefix='', event='map_key', value='null_val', value_str='', is_end=True)
ParserEvent(prefix='', event='map_key', value='list_val', value_str='', is_end=True)
ParserEvent(prefix='.list_val[0]', event='number', value=1, value_str='', is_end=True)
ParserEvent(prefix='.list_val[1]', event='number', value=2, value_str='', is_end=True)
ParserEvent(prefix='.list_val[2]', event='number', value=3, value_str='', is_end=True)
ParserEvent(prefix='.list_val', event='end_array', value=[1, 2, 3], value_str=']', is_end=True)
ParserEvent(prefix='', event='map_key', value='dict_val', value_str='', is_end=True)
ParserEvent(prefix='.dict_val', event='map_key', value='a', value_str='', is_end=True)
ParserEvent(prefix='.dict_val.a', event='number', value=1, value_str='', is_end=True)
ParserEvent(prefix='.dict_val', event='map_key', value='b', value_str='', is_end=True)
ParserEvent(prefix='.dict_val.b', event='number', value=2, value_str='', is_end=True)
ParserEvent(prefix='.dict_val', event='map_key', value='c', value_str='', is_end=True)
ParserEvent(prefix='.dict_val.c', event='number', value=3, value_str='', is_end=True)
ParserEvent(prefix='.dict_val', event='end_map', value={'a': 1, 'b': 2, 'c': 3}, value_str='}', is_end=True)
ParserEvent(prefix='', event='map_key', value='nested_val', value_str='', is_end=True)
ParserEvent(prefix='.nested_val', event='map_key', value='a', value_str='', is_end=True)
ParserEvent(prefix='.nested_val.a', event='number', value=1, value_str='', is_end=True)
ParserEvent(prefix='.nested_val', event='map_key', value='b', value_str='', is_end=True)
ParserEvent(prefix='.nested_val.b', event='number', value=2, value_str='', is_end=True)
ParserEvent(prefix='.nested_val', event='map_key', value='c', value_str='', is_end=True)
ParserEvent(prefix='.nested_val.c', event='map_key', value='x', value_str='', is_end=True)
ParserEvent(prefix='.nested_val.c.x', event='number', value=10, value_str='', is_end=True)
ParserEvent(prefix='.nested_val.c', event='map_key', value='y', value_str='', is_end=True)
ParserEvent(prefix='.nested_val.c.y', event='number', value=20, value_str='', is_end=True)
ParserEvent(prefix='.nested_val.c', event='map_key', value='z', value_str='', is_end=True)
ParserEvent(prefix='.nested_val.c.z', event='number', value=30, value_str='', is_end=True)
ParserEvent(prefix='.nested_val.c', event='end_map', value={'x': 10, 'y': 20, 'z': 30}, value_str='}', is_end=True)
ParserEvent(prefix='.nested_val', event='end_map', value={'a': 1, 'b': 2, 'c': {'x': 10, 'y': 20, 'z': 30}}, value_str='}', is_end=True)
ParserEvent(prefix='', event='end_map', value={'str_val': 'hello world', 'int_val': 42, 'float_val': 3.14, 'bool_val': True, 'null_val': None, 'list_val': [1, 2, 3], 'dict_val': {'a': 1, 'b': 2, 'c': 3}, 'nested_val': {'a': 1, 'b': 2, 'c': {'x': 10, 'y': 20, 'z': 30}}}, value_str='}', is_end=True)