SwissDataScienceCenter / calamus

A JSON-LD Serialization Libary for Python
Apache License 2.0
29 stars 12 forks source link

rdf_type array of length > 1 breaks fields.Nested deserialization. #73

Closed tripougnif closed 2 years ago

tripougnif commented 2 years ago

This using the master branch. The following code sample shows that having more than one inherited type extending rdf_type breaks nested fields.

from calamus.schema import JsonLDAnnotation
import calamus.fields as fields
from pyld import jsonld, documentloader
from pprint import pprint
import json

def myloader(*args, **kwargs):
    requests_loader = documentloader.requests.requests_document_loader(*args, **kwargs)

    def loader(url, options={}):
        options['headers']['Accept'] = 'application/ld+json'
        return requests_loader(url, options)

    return loader
jsonld.set_document_loader(myloader())

schema = fields.Namespace("http://schema.org#")
as2 = fields.Namespace("https://www.w3.org/ns/activitystreams#")

OBJ = ['NoteSchema', 'PageSchema']

class Create(metaclass=JsonLDAnnotation):
    _id = fields.Id()
    object_ = fields.Nested(as2.object, nested=OBJ)
    created_at = fields.DateTime(as2.published, add_value_types=True)

    class Meta:
        rdf_type = as2.Create

class Note(Create):
    class Meta:
        rdf_type = as2.Note

    summary = fields.String(as2.summary)

class Page(Note):
    class Meta:
        rdf_type = as2.Page

pl = json.loads(b'{"@context": ["https://www.w3.org/ns/activitystreams", {"schema": "http://schema.org#"}], "object": {"type": "Note", "id": "http://example.com/note/1", "summary": "Some other summary", "published": "1970-01-01 00:00Z"}, "type": "Create", "id": "http://example.com/create/1"}')

activity = Create.schema().load(pl)
pprint(jsonld.compact(activity.dump(), ctx=pl['@context']))

Running the this code produces in the following trace:

Traceback (most recent call last):
  File "/home/alain/Code/calamus/case.py", line 42, in <module>
    activity = Create.schema().load(pl)
  File "/home/alain/.cache/pypoetry/virtualenvs/calamus-aUzyu6-E-py3.10/lib/python3.10/site-packages/marshmallow/schema.py", line 717, in load
    return self._do_load(
  File "/home/alain/.cache/pypoetry/virtualenvs/calamus-aUzyu6-E-py3.10/lib/python3.10/site-packages/marshmallow/schema.py", line 852, in _do_load
    result = self._deserialize(
  File "/home/alain/Code/calamus/calamus/schema.py", line 335, in _deserialize
    value = self._call_and_store(
  File "/home/alain/.cache/pypoetry/virtualenvs/calamus-aUzyu6-E-py3.10/lib/python3.10/site-packages/marshmallow/schema.py", line 495, in _call_and_store
    value = getter_func(data)
  File "/home/alain/Code/calamus/calamus/schema.py", line 334, in <lambda>
    getter = lambda val: field_obj.deserialize(val, field_name, data, **d_kwargs)
  File "/home/alain/.cache/pypoetry/virtualenvs/calamus-aUzyu6-E-py3.10/lib/python3.10/site-packages/marshmallow/fields.py", line 364, in deserialize
    output = self._deserialize(value, attr, data, **kwargs)
  File "/home/alain/Code/calamus/calamus/fields.py", line 620, in _deserialize
    return super()._deserialize(value, attr, data, **kwargs)
  File "/home/alain/Code/calamus/calamus/fields.py", line 141, in _deserialize
    return super()._deserialize(value, attr, data, **kwargs)
  File "/home/alain/.cache/pypoetry/virtualenvs/calamus-aUzyu6-E-py3.10/lib/python3.10/site-packages/marshmallow/fields.py", line 660, in _deserialize
    return self._load(value, data, partial=partial)
  File "/home/alain/Code/calamus/calamus/fields.py", line 576, in _load
    valid_data = self.load_single_entry(value, partial)
  File "/home/alain/Code/calamus/calamus/fields.py", line 544, in load_single_entry
    schema = self.schema["from"][str(type_)]
KeyError: "['https://www.w3.org/ns/activitystreams#Note']"

with self._schema["from"] = {"['https://www.w3.org/ns/activitystreams#Create', 'https://www.w3.org/ns/activitystreams#Note']": <NoteSchema(many=False)>, "['https://www.w3.org/ns/activitystreams#Create', 'https://www.w3.org/ns/activitystreams#Note', 'https://www.w3.org/ns/activitystreams#Page']": <PageSchema(many=False)>}

A quick workaround is to make sure only the corresponding type is used as a key, but I'd like to understand the rationale behind setting multiple types for rdf_type.