python-attrs / cattrs

Composable custom class converters for attrs, dataclasses and friends.
https://catt.rs
MIT License
779 stars 108 forks source link

How to recursively unstructure with hooks? #520

Open kkg-else42 opened 3 months ago

kkg-else42 commented 3 months ago

Hey there,

I have put together this small example to illustrate my issue. Context is JSON serialization.

There is an outer class Frame, whose data field is a UnionType (used as tagged union later). Some fields have a None as default value -- those should not appear in the resulting JSON string:

import attrs
from cattrs.gen import make_dict_unstructure_fn
from cattrs.preconf.json import JsonConverter, make_converter
from cattrs.strategies import configure_tagged_union
from types import UnionType
from typing import get_origin, get_args

@attrs.frozen
class A:
    to_keep: str = 'foo'
    to_skip: str|None = None

@attrs.frozen
class B:
    to_keep: str = 'bar'
    to_skip: str|None = None

FrameData = dict | A | B

@attrs.frozen
class Frame:
    data: FrameData
    to_skip: str|None = None

Then there are two helpers and a hook factory:

def _contains_nonetype(type_) -> bool:
    if get_origin(type_) is UnionType:
        return type(None) in get_args(type_)
    return type_ is type(None)

def _is_attrs_with_none_defaults(type_: type) -> bool:
    return attrs.has(type_) and any(_contains_nonetype(a.type) and a.default is None
                                    for a in attrs.fields(type_))

def _get_unstructure_without_nones(cls):
        unstructure = make_dict_unstructure_fn(cl=cls, converter=conv)
        fields = [a.name for a in attrs.fields(cls) if _contains_nonetype(a.type) and a.default is None]

        def unstructure_without_nones(obj):
            unstructured = unstructure(obj)
            for field in fields:
                if unstructured[field] is None:
                    unstructured.pop(field)
            return unstructured

        return unstructure_without_nones

Then there is the converter, an additional hook (to add something special to the Frame object) and the tagged union definition:

conv = make_converter()
conv.register_unstructure_hook_factory(predicate=_is_attrs_with_none_defaults, factory=_get_unstructure_without_nones)
conv.register_unstructure_hook(Frame, lambda obj: {'to_add': 'something special', **conv.unstructure_attrs_asdict(obj)})
configure_tagged_union(union=FrameData, converter=conv, tag_name='_type', tag_generator=lambda t: t.__name__.casefold(), default=dict)

If I serialize the Frame instance:

print(conv.dumps(Frame(data=A())))

I get this:

{"to_add": "something special", "data": {"to_keep": "foo", "_type": "a"}, "to_skip": null}

But what I need is this (with the None-value attribute omitted):

{"to_add": "something special", "data": {"to_keep": "foo", "_type": "a"}}

I am sure it is related to this hook, because it works as expected without it:

conv.register_unstructure_hook(Frame, lambda obj: {'to_add': 'something special', **conv.unstructure_attrs_asdict(obj)})

It would be great if you could help me out again

Tinche commented 3 months ago

Here's the problem:

conv.register_unstructure_hook(
    Frame,
    lambda obj: {"to_add": "something special", **conv.unstructure_attrs_asdict(obj)},
)

If you call into conv.unstructure_attrs_asdict directly your _get_unstructure_without_nones won't get called (that function will simply do all the work).

Try this instead:

base_frame_hook = _get_unstructure_without_nones(Frame)
conv.register_unstructure_hook(
    Frame, lambda obj: {"to_add": "something special", **base_frame_hook(obj)}
)
kkg-else42 commented 3 months ago

Greatly appreciated help at light speed!

What would it look like if the thing is a bit more complex? E.g. multiple hook factories and separate helpers for the hooks instead of an inline lambda definition. I would be reluctant to couple the Frame hook with the hook factories.

Tinche commented 3 months ago

Can you give me a more complex example?

kkg-else42 commented 3 months ago

Let's take this as a starting point:

from datetime import datetime
from functools import singledispatch
from types import UnionType
from typing import Any, Final, get_origin, get_args

import attrs
from cattrs.gen import make_dict_unstructure_fn
from cattrs.preconf.json import JsonConverter, make_converter
from cattrs.strategies import configure_tagged_union

@attrs.frozen
class Sub:
    foo: str = 'bar'
    # and some more fields (incl. other attrs types)

@attrs.frozen
class A:
    to_keep: str = 'foo'
    to_skip: str|None = None
    sub: Sub = Sub()

@attrs.frozen
class B:
    to_keep: str = 'bar'
    to_skip: str|None = None
    sub: Sub = Sub()

FrameData = dict | A | B

@attrs.frozen
class Frame:
    data: FrameData
    to_skip: str|None = None

_CUSTOMIZED_UNSTRUCTURE_TYPES: Final[set] = {
    datetime,
    Frame,
    Sub,
    # and some more...
}

_TAGGED_UNIONS: Final[set] = {
    FrameData,
    # and some more...
}

_TIMESTAMP_FORMAT: Final[str] = '%Y%m%d_%H%M%S'

conv = make_converter()

def _contains_nonetype(type_):
    if get_origin(type_) is UnionType:
        return type(None) in get_args(type_)
    return type_ is type(None)

def _is_attrs_with_none_defaults(type_: type) -> bool:
    return attrs.has(type_) and any(_contains_nonetype(a.type) and a.default is None
                                    for a in attrs.fields(type_))

def _get_unstructure_without_nones(cls):
        unstructure = make_dict_unstructure_fn(cl=cls, converter=conv)
        fields = [a.name for a in attrs.fields(cls) if _contains_nonetype(a.type) and a.default is None]

        def unstructure_without_nones(obj):
            unstructured = unstructure(obj)
            for field in fields:
                if unstructured[field] is None:
                    unstructured.pop(field)
            return unstructured

        return unstructure_without_nones

@singledispatch
def _unstructure(obj: Any) -> dict | str:
    raise NotImplementedError(f'Unsupported type: {type(obj)}.')

@_unstructure.register
def _(obj: datetime) -> str:
    return obj.strftime(_TIMESTAMP_FORMAT)

@_unstructure.register
def _(obj: Frame) -> dict:
    base_unstructure = _get_unstructure_without_nones(Frame)
    return {'to_add': 'something special', **base_unstructure(obj)}

@_unstructure.register
def _(obj: Sub) -> dict:
    modified_dict = {
        # re-arrange data structure...
    }
    return conv.unstructure(modified_dict)

conv.register_unstructure_hook_factory(predicate=_is_attrs_with_none_defaults, factory=_get_unstructure_without_nones)

for data_type in _CUSTOMIZED_UNSTRUCTURE_TYPES:
    conv.register_unstructure_hook(data_type, lambda obj: _unstructure(obj))

for tagged_union in _TAGGED_UNIONS:
    configure_tagged_union(union=tagged_union, converter=conv, tag_name='_type', tag_generator=lambda t: t.__name__.casefold(), default=dict)

Now I would like to add another hook factory, e.g. the to_camel_case thing from the docs. How could I integrate this? And later on I might need to add another one (and another one...). ;-)

kkg-else42 commented 3 months ago

Hi Tin, Is there anything else I should add or is it just a busy schedule?