Closed mcleantom closed 5 months ago
I think the error is coming from the ModelProxySerializer
class in the deserialize
function on the line:
if (sub_element := element.pop_element(self._element_name, self._search_mode)) is not None:
Where self._element_name
is bar
and this line returns None
, however if you modify it to:
>>> sub_element = element.pop_element('{http://www.cirm.org/RTZ/1/0}bar', self._search_mode)
>>> sub_element
<pydantic_xml.element.native.lxml.XmlElement object at 0x7f8eb5ca6ca0>
You get an object back and the rest of the code works:
>>> sourcemap[loc] = sub_element.get_sourceline()
>>> is_element_nill(sub_element)
False
>>> self._model.__xml_serializer__.deserialize(
... sub_element, context=context, sourcemap=sourcemap, loc=loc,
... )
Bar(y=10)
The value of self._element_name
is being set in the initializer of ModelProxySerializer
on the line:
self._element_name = QName.from_alias(tag=name, ns=ns, nsmap=nsmap).uri
And the nsmap
is not being passed into the Bar
class:
>>> nsmap
ChainMap({})
>>> name
'bar'
Still trying to work out why this is, and a fix for it
Still trying to work it out but a cleaner unit test for the bug is:
def test_deeply_nested_model_list():
class TestSubSubModel(BaseXmlModel, tag="subsubmodel"):
attr1: int = attr()
attr2: int = attr()
class TestSubModel(BaseXmlModel, tag="submodel"):
subsubmodels: list[TestSubSubModel] = element(default_factory=list)
class TestModel(
BaseXmlModel,
tag="model",
nsmap={"": "http://test1.org"},
):
submodels: TestSubModel = element()
xml = """
<model xmlns="http://test1.org">
<submodel>
<subsubmodel attr1="1" attr2="2"/>
<subsubmodel attr1="3" attr2="4"/>
</submodel>
</model>
"""
expected_obj = TestModel(
submodels=TestSubModel(
subsubmodels=[
TestSubSubModel(attr1=1, attr2=2),
TestSubSubModel(attr1=3, attr2=4),
]
)
)
actual_obj = TestModel.from_xml(xml)
assert actual_obj == expected_obj
This bug is happening because the ModelProxySerializer
class is using the first initialized instance of the TestSubModel
serializer on the line:
return self._model.__xml_serializer__.deserialize(
sub_element, context=context, sourcemap=sourcemap, loc=loc,
)
And this version of the serializer doesnt have the new namespaces from the parent context.
I can fix this bug for this test if I modify the ModelProxySerializer
class to be:
class ModelProxySerializer(BaseModelSerializer):
@classmethod
def from_core_schema(cls, schema: pcs.ModelSchema, ctx: Serializer.Context) -> 'ModelProxySerializer':
model_cls = schema['cls']
assert issubclass(model_cls, pxml.BaseXmlModel), "unexpected model type"
name = ctx.entity_path or model_cls.__xml_tag__ or ctx.field_alias or ctx.field_name or model_cls.__name__
ns = select_ns(ctx.entity_ns, model_cls.__xml_ns__, ctx.parent_ns)
nsmap = merge_nsmaps(ctx.entity_nsmap, model_cls.__xml_nsmap__, ctx.parent_nsmap)
search_mode = ctx.search_mode
computed = ctx.field_computed
nillable = ctx.nillable
serializer = ModelSerializer.from_core_schema(schema, ctx)
return cls(model_cls, name, ns, nsmap, search_mode, computed, nillable, serializer)
def __init__(
self,
model: Type['pxml.BaseXmlModel'],
name: str,
ns: Optional[str],
nsmap: Optional[NsMap],
search_mode: SearchMode,
computed: bool,
nillable: bool,
serializer: ModelSerializer,
):
self._model = model
self._element_name = QName.from_alias(tag=name, ns=ns, nsmap=nsmap).uri
self._nsmap = nsmap
self._search_mode = search_mode
self._computed = computed
self._nillable = nillable
self._serializer = serializer
@property
def model(self) -> Type['pxml.BaseXmlModel']:
return self._model
@property
def model_serializer(self) -> Optional[BaseModelSerializer]:
return self._model.__xml_serializer__
@property
def element_name(self) -> str:
return self._element_name
@property
def nsmap(self) -> Optional[NsMap]:
return self._nsmap
def serialize(
self,
element: XmlElementWriter,
value: 'pxml.BaseXmlModel',
encoded: Dict[str, Any],
*,
skip_empty: bool = False,
) -> Optional[XmlElementWriter]:
assert self._model.__xml_serializer__ is not None, f"model {self._model.__name__} is partially initialized"
if self._nillable and value is None:
sub_element = element.make_element(self._element_name, nsmap=self._nsmap)
make_element_nill(sub_element)
element.append_element(sub_element)
return sub_element
if value is None:
return None
sub_element = element.make_element(self._element_name, nsmap=self._nsmap)
self._model.__xml_serializer__.serialize(sub_element, value, encoded, skip_empty=skip_empty)
if skip_empty and sub_element.is_empty():
return None
else:
element.append_element(sub_element)
return sub_element
def deserialize(
self,
element: Optional[XmlElementReader],
*,
context: Optional[Dict[str, Any]],
sourcemap: Dict[Location, int],
loc: Location,
) -> Optional['pxml.BaseXmlModel']:
assert self._model.__xml_serializer__ is not None, f"model {self._model.__name__} is partially initialized"
if self._computed:
return None
if element is None:
return None
if (sub_element := element.pop_element(self._element_name, self._search_mode)) is not None:
sourcemap[loc] = sub_element.get_sourceline()
if is_element_nill(sub_element):
return None
else:
return self._serializer.deserialize(
sub_element, context=context, sourcemap=sourcemap, loc=loc,
)
else:
return None
Where in from_core_schema
I have built a new ModelSerializer from scratch which uses the new ctx
which includes the parent namespaces. I then use this new serialization object in the return statement in the deserialize
function.
However, this breaks a bunch of other unit tests.
A few of the tests can be fixed by either making a ModelSerializer
or a RootModelSerializer
like so:
if schema['schema']['type'] == 'model-fields':
serializer = ModelSerializer.from_core_schema(schema, ctx)
else:
serializer = RootModelSerializer.from_core_schema(schema, ctx)
However, this breaks unit tests with self-referencing models such as test_misc.py -> test_self_ref_models
.
Without any changes to the code, a self referencing model within another model with a namespace map also breaks:
def test_self_ref_models_with_namespace_map_on_root():
class TestSubModel(BaseXmlModel, tag='submodel'):
attr1: int = attr()
element1: float = element()
model1: Optional['TestSubModel'] = element(tag='model1', default=None)
models1: Optional[List['TestSubModel']] = element(tag='item1', default=None)
models2: Optional[Tuple['TestSubModel', 'TestSubModel']] = element(tag='item2', default=None)
class TestModel(BaseXmlModel, tag="model", nsmap={'': 'https://wwww.test.org'}):
submodel: TestSubModel = element(tag='submodel')
obj = TestModel(
submodel=TestSubModel(
attr1=1,
element1=1.1,
model1=TestSubModel(
attr1=2,
element1=2.2,
),
models1=[
TestSubModel(
attr1=3,
element1=3.3,
),
TestSubModel(
attr1=4,
element1=4.4,
),
],
models2=[
TestSubModel(
attr1=5,
element1=5.5,
),
TestSubModel(
attr1=6,
element1=6.6,
),
],
)
)
returned_obj = TestModel.from_xml(obj.to_xml(skip_empty=True))
assert returned_obj == obj
This bug can also be fixed by passing the namespace all the way down the deserialize
path:
def deserialize(
self,
element: Optional[XmlElementReader],
*,
context: Optional[Dict[str, Any]],
sourcemap: Dict[Location, int],
loc: Location,
nsmap: NsMap # <-- New parameter
) -> Optional['pxml.BaseXmlModel']:
And adding a new method to BaseModelSerializer
:
@staticmethod
def get_element_name(tag: str, nsmap: NsMap) -> str:
return QName.from_alias(tag=tag, ns=None, nsmap=nsmap).uri
And then replacing ModelProxySerializer.deserialize
to be:
def deserialize(
self,
element: Optional[XmlElementReader],
*,
context: Optional[Dict[str, Any]],
sourcemap: Dict[Location, int],
loc: Location,
nsmap: NsMap
) -> Optional['pxml.BaseXmlModel']:
assert self._model.__xml_serializer__ is not None, f"model {self._model.__name__} is partially initialized"
if self._computed:
return None
if element is None:
return None
if (sub_element := element.pop_element(self.get_element_name(self._tag, nsmap), self._search_mode)) is not None:
sourcemap[loc] = sub_element.get_sourceline()
if is_element_nill(sub_element):
return None
else:
return self._model.__xml_serializer__.deserialize(
sub_element, context=context, sourcemap=sourcemap, loc=loc, nsmap=nsmap
)
else:
return None
But again, this breaks other unit tests, so maybe not the solution?
Actually, doing this passes all tests including my new tests:
def deserialize(
self,
element: Optional[XmlElementReader],
*,
context: Optional[Dict[str, Any]],
sourcemap: Dict[Location, int],
loc: Location,
nsmap: NsMap
) -> Optional['pxml.BaseXmlModel']:
assert self._model.__xml_serializer__ is not None, f"model {self._model.__name__} is partially initialized"
if self._computed:
return None
if element is None:
return None
sub_element = element.pop_element(self._element_name, self._search_mode)
if sub_element is None:
sub_element = element.pop_element(self.get_element_name(self._tag, nsmap), self._search_mode)
if sub_element is not None:
sourcemap[loc] = sub_element.get_sourceline()
if is_element_nill(sub_element):
return None
else:
return self._model.__xml_serializer__.deserialize(
sub_element, context=context, sourcemap=sourcemap, loc=loc, nsmap=nsmap
)
else:
return None
Not sure if it is a good solution. @dapper91 thoughts?#
edit: Actually, doesnt pass my test for self referencing models with a namespace on the root element
@mcleantom Hi.
Sub-models don't inherit the namespace map from the parent one. So in your example Bars
sub-model doesn't inherit the namespace map (therefore the http://www.cirm.org/RTZ/1/0
namespace) from Baz
. If you define it explicitly you example should work correctly:
>>> from pydantic_xml import BaseXmlModel, attr, element
>>> from typing import Literal
>>>
>>> NSMAP = {
... '': "http://www.cirm.org/RTZ/1/0",
... 'xsi': "http://www.w3.org/2001/XMLSchema-instance",
... }
>>>
>>>
>>> class Foo(BaseXmlModel, tag="foo"):
... x: int = attr(default=1)
...
>>>
>>> class Bar(BaseXmlModel, tag="bar"):
... y: int = attr(default=2)
...
>>>
>>> class Bars(BaseXmlModel, tag="bars", nsmap=NSMAP):
... bars: list[Bar] = element(default_factory=list)
...
>>>
>>> class Baz(BaseXmlModel, tag="route", nsmap=NSMAP):
... version: Literal["1.0"] = attr()
... foo: Foo = element(default_factory=Foo)
... bars: Bars = element(default_factory=Bars)
...
>>> bar = Baz(
... version="1.0",
... foo=Foo(x=1),
... bars=Bars(bars=[Bar(y=1), Bar(y=2)])
... )
>>> print(bar.to_xml(pretty_print=True, skip_empty=True).decode("utf-8"))
<route xmlns="http://www.cirm.org/RTZ/1/0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" version="1.0">
<foo x="1"/>
<bars>
<bar y="1"/>
<bar y="2"/>
</bars>
</route>
>>> print(Baz.from_xml(bar.to_xml(skip_empty=True)))
version='1.0' foo=Foo(x=1) bars=Bars(bars=[Bar(y=1), Bar(y=2)])
When I add a default namespace to my class, it causes serializing list elements in a nested
BaseXmlModel
to fail.An example of this is:
This outputs:
For
Baz
and for
Qux
it outputs:So in the second version, it has correctly serialized the
Bars
class to beBars(bars=[Bar(y=1), Bar(y=2)])
whereas in the first version, it has incorrectly serialized it to beBars(bars=[])
. The only difference between the two classes is thensmap
parameter.How can I add the default namespace parameter, and get
Baz
to serialize the string correctly?Thanks