marshmallow-code / marshmallow

A lightweight library for converting complex objects to and from simple Python datatypes.
https://marshmallow.readthedocs.io/
MIT License
7.01k stars 627 forks source link

Dynamically set data_key in nested field? #2305

Open Diego-Hernandez-Moodys opened 3 weeks ago

Diego-Hernandez-Moodys commented 3 weeks ago

Hi,

I'd like to load a list of objects that are simple

{
  "id": str,
  "body": str
}

but keep this flexible as users have their own ways of naming ids and text fields. I've solved this by overwriting the init by passing in a mapping. If there's a better way to do this, let me know.

Demo

# Schema

class Bodies(marshmallow.Schema):
    id=marshmallow.fields.String(required=True, allow_none=False)
    body=marshmallow.fields.String(required=True, allow_none=False, validate=marshmallow.validate.Length(min=1))

    class Meta:
        unknown = marshmallow.EXCLUDE # ignore any unknown fields (specific to my use case)

    def __init__(self, *args, **kwargs):
        """
        User can pass the mapping={ "id": alt_id_name, "body": alt_body_name }
        """
        mapping = kwargs.pop('mapping', {})

        super().__init__(*args, **kwargs)

        # Update fields based on the provided mapping
        for field_name, data_key in mapping.items():
            if field_name in self.fields:
                self.fields[field_name].data_key = data_key
            else:
                raise ValueError(f"Field '{field_name}' not found in the schema.")

# Dynamically setting the data_key

data = {
"bodies": [
    {
        "article_id": "123", 
         "article_body": "Hello, World!", 
         "extra_field_to_ignore": "value"
    },
    {
        "article_id": "456", 
        "article_body": "Goodbye, World!",
        "extra_field_to_ignore": "value"
    }
]
# user provided
"id_field_name": "article_id",
"body_field_name": "article_body"
}

schema = Bodies(mapping={"id": "article_id", "body": "article_body"})
schema.load(data["bodies"][0])

The problem I have is how to properly use this in a parent schema. (If this is a polymorphism issue, feel free to ignore and close.)

class FullSchemaData(marshmallow.Schema):
    """
    """
    bodies = marshmallow.fields.List(
        marshmallow.fields.Nested(Bodies(
            ??? <------------- define a mapping from user-defined fields
        ), required=True),
        required=True,
        allow_none=False,
        validate=marshmallow.validate.Length(min=1, max=25)
    )

    # Optional
    id_field_name = marshmallow.fields.String(
        required=False,
        allow_none=False,
        validate=marshmallow.validate.Length(min=1),
        load_default="id"
    )
    body_field_name = marshmallow.fields.String(
        required=False,
        allow_none=False,
        validate=marshmallow.validate.Length(min=1),
        load_default="body"
    )

I tried using a pre_load on the FullSchemaData to try and override the field, but I do get an error

# FullSchemaData
    @marshmallow.pre_load
    def adjust_dynamic_body_schema(self, data, **kwargs):
        # Extract the optional fields or use default values
        id_field_name = data.get('id_field_name', 'id')
        body_field_name = data.get('body_field_name', 'body')
        mapping={"id": id_field_name, "body": body_field_name}

        # Dynamically set the mapping for the nested schema
        self.fields['bodies'] = marshmallow.fields.List(
            marshmallow.fields.Nested(BodiesSimilarSentenceClusterIds(mapping=mapping), required=True),
            required=True,
            allow_none=False,
            validate=marshmallow.validate.Length(min=1, max=25)
        )
        return data

# Loading
s = FullSchemaData()
s.load(data)
>>>
{'bodies': {0: {'id': ['Missing data for required field.'], 'body': ['Missing data for required field.']}, 1: {'id': ['Missing data for required field.'], 'body': ['Missing data for required field.']}}}

Does Marshmallow have the tools the handle this use case?