python-attrs / cattrs

Composable custom class converters for attrs, dataclasses and friends.
https://catt.rs
MIT License
791 stars 110 forks source link

How to deserialize multidict to data model? #407

Open wencan opened 1 year ago

wencan commented 1 year ago

multidict: https://github.com/aio-libs/multidict

import dataclasses
import typing
import cattrs
import multidict

@dataclasses.dataclass
class Model:
    a: typing.List[str]
    b: str

d = multidict.MultiDict([('a', '111'), ('b', '2'), ('a', '333')])
obj = cattrs.structure(d, Model)
print(obj)

d = multidict.MultiDict([('a', '111'), ('b', '2')])
obj = cattrs.structure(d, Model)
print(obj)

want:

Model(a=['111', '333'], b='222')
Model(a=['111'], b='222')

actual:

Model(a=['1', '1', '1'], b='222')
Model(a=['1', '1', '1'], b='222')
Tinche commented 1 year ago

That's not suprising, MultiDict[key] returns the first value for the key.

The simplest solution is to preprocess the multidict into a normal dictionary with potential lists for values.

import dataclasses
import typing

import multidict

import cattrs

@dataclasses.dataclass
class Model:
    a: typing.List[str]
    b: str

def md_to_dict(d: multidict.MultiDict) -> dict:
    return {k: vs if len(vs := d.getall(k)) > 1 else vs[0] for k in d}

d = multidict.MultiDict([("a", "111"), ("b", "2"), ("a", "333")])
obj = cattrs.structure(md_to_dict(d), Model)
print(obj)
wencan commented 1 year ago

@Tinche My current solution is the same as yours. Please try the following example:

d = multidict.MultiDict([("a", "111"), ("b", "2")])
obj = cattrs.structure(md_to_dict(d), Model)
print(obj)

It will output:

Model(a=['1', '1', '1'], b='2')
Tinche commented 1 year ago

Ah yeah, I see.

The issue is 111 is actually a valid list[str] because in Python, strings are sequences of strings (characters). You could change the validation but it wouldn't help you, it would just raise an error instead of doing what you want.

This is doable but tricky. Let me think about it.

wencan commented 1 year ago

@Tinche

It seems, register_structure_hook_factory can solve this problem. please review my code:

import dataclasses
import datetime
import typing
import functools
import cattrs
import cattrs.gen
import multidict

@dataclasses.dataclass
class Model:
    a: tuple[str, ...]  # or typing.Tuple[str, ...]
    b: tuple[int, ...]
    c: str
    d: tuple[datetime.datetime, ...] = dataclasses.field(
        default_factory=datetime.datetime.now)  # type: ignore

converter = cattrs.Converter()
converter.register_structure_hook(
    datetime.datetime, lambda value, _: datetime.datetime.fromisoformat(value))

def make_multidict_structure_fn(cls):
    def structure(data, cls):
        if isinstance(data, list):
            return tuple([converter.structure(item, typing.get_args(cls)[0]) for item in data])
        return (converter.structure(data, typing.get_args(cls)[0]), )
    return structure

converter.register_structure_hook_factory(
    lambda cls: typing.get_origin(cls) is tuple,
    lambda cls: make_multidict_structure_fn(cls)
)

def multidict_to_dict(d: multidict.MultiDict) -> dict:
    return {k: vs if len(vs := d.getall(k)) > 1 else vs[0] for k in d}

d = multidict.MultiDict(
    [('a', '111'), ('b', '222'), ('c', 'abc'), ('d', '2023-08-14T08:54:35.055481+00:00')])
obj = converter.structure(multidict_to_dict(d), Model)
print(obj)  # Model(a=('111',), b=(222,), c='abc', d=(datetime.datetime(2023, 8, 14, 8, 54, 35, 55481, tzinfo=datetime.timezone.utc),))

d = multidict.MultiDict(
    [('a', '111'), ('a', '333'), ('b', '222'), ('c', 'abc')])
obj = converter.structure(multidict_to_dict(d), Model)
print(obj)  # Model(a=('111', '333'), b=(222,), c='abc', d=(...,))

d = multidict.MultiDict(
    [('a', '111'), ('b', '222'), ('b', '555'), ('a', '333'), ('c', 'abc'), ('d', '2023-08-14T08:54:35.055481+00:00'), ('d', '2023-08-16T08:54:35.055481+00:00')])
obj = converter.structure(multidict_to_dict(d), Model)
print(obj)  # Model(a=('111', '333'), b=(222, 555), c='abc', d=(datetime.datetime(2023, 8, 14, 8, 54, 35, 55481, tzinfo=datetime.timezone.utc), datetime.datetime(2023, 8, 16, 8, 54, 35, 55481, tzinfo=datetime.timezone.utc)))
Tinche commented 1 year ago

I see you're overriding hooks for tuples, and sure, that's a fine approach.

For my approach, I tried something different: I overrode the hooks for dataclasses.

Here's your example with my approach (I added some type hints for my convenience):

import dataclasses
import datetime
import typing
from collections.abc import Callable

import multidict

import cattrs
import cattrs.gen

@dataclasses.dataclass
class Model:
    a: tuple[str, ...]  # or typing.Tuple[str, ...]
    b: tuple[int, ...]
    c: str
    d: tuple[datetime.datetime, ...] = dataclasses.field(
        default_factory=datetime.datetime.now
    )  # type: ignore

converter = cattrs.Converter()
converter.register_structure_hook(
    datetime.datetime, lambda value, _: datetime.datetime.fromisoformat(value)
)

def dataclass_hook_factory(
    cl: type,
) -> Callable[[multidict.MultiDict, typing.Any], typing.Any]:
    seq_fields = [
        f.name for f in dataclasses.fields(cl) if typing.get_origin(f.type) in (tuple,)
    ]

    orig_fn = cattrs.gen.make_dict_structure_fn(cl, converter)

    def structure_dc(val: multidict.MultiDict, _: typing.Any):
        mapped_val = {}
        for k, v in val.items():
            if k in seq_fields:
                mapped_val[k] = val.getall(k)
            else:
                mapped_val[k] = v
        return orig_fn(mapped_val, _)

    return structure_dc

converter.register_structure_hook_factory(
    dataclasses.is_dataclass, dataclass_hook_factory
)

## Tests

d = multidict.MultiDict(
    [
        ("a", "111"),
        ("b", "222"),
        ("c", "abc"),
        ("d", "2023-08-14T08:54:35.055481+00:00"),
    ]
)
obj = converter.structure(d, Model)
print(obj)

d = multidict.MultiDict([("a", "111"), ("a", "333"), ("b", "222"), ("c", "abc")])
obj = converter.structure(d, Model)
print(obj)  # Model(a=('111', '333'), b=(222,), c='abc', d=(...,))

d = multidict.MultiDict(
    [
        ("a", "111"),
        ("b", "222"),
        ("b", "555"),
        ("a", "333"),
        ("c", "abc"),
        ("d", "2023-08-14T08:54:35.055481+00:00"),
        ("d", "2023-08-16T08:54:35.055481+00:00"),
    ]
)
obj = converter.structure(d, Model)
print(obj)

Once nice thing about my approach is that it can be configured to handle lists too, I believe.