tefra / xsdata

Naive XML & JSON Bindings for python
https://xsdata.readthedocs.io
MIT License
310 stars 56 forks source link

Errors lxmlserializer #1024

Closed skinkie closed 3 months ago

skinkie commented 3 months ago

Download https://data.ndovloket.nl/netex/wsf/NeTEx_WSF_WSF_20240415_20240415.xml.gz in /tmp.

Notice:

  1. Pretty printing is not respected
  2. All attributes are not placed on the objects anymore. <OperationalContextRef></OperationalContextRef> expected <OperationalContextRef ref="..."/>
from xsdata.formats.dataclass.context import XmlContext
from xsdata.formats.dataclass.parsers import XmlParser
from xsdata.formats.dataclass.parsers.config import ParserConfig
from xsdata.formats.dataclass.parsers.handlers import LxmlEventHandler, lxml

from xsdata.formats.dataclass.serializers import LxmlTreeSerializer
from xsdata.formats.dataclass.serializers.config import SerializerConfig

from netex import ServiceFrame

context = XmlContext()
config = ParserConfig(fail_on_unknown_properties=False)
parser = XmlParser(context=context, config=config, handler=LxmlEventHandler)

tree = lxml.etree.parse("/tmp/NeTEx_WSF_WSF_20240415_20240415.xml.gz")

service_frame: ServiceFrame

for element in tree.iterfind(".//{http://www.netex.org.uk/netex}ServiceFrame"):
    service_frame = parser.parse(element, ServiceFrame)

serializer_config = SerializerConfig(ignore_default_attributes=True)
serializer_config.pretty_print = True
serializer_config.ignore_default_attributes = True
lxml_serializer = LxmlTreeSerializer(context, serializer_config)

tree = lxml.etree.parse("/tmp/NeTEx_WSF_WSF_20240415_20240415.xml.gz")

element = tree.find(".//{http://www.netex.org.uk/netex}ServiceFrame")
element.getparent().replace(element, lxml_serializer.render(service_frame))

tree.write("/tmp/test.xml", pretty_print=True, strip_text=True)
tefra commented 3 months ago

@skinkie please keep your samples simple for me,

import lxml
from dataclasses import field, dataclass

from xsdata.formats.dataclass.serializers import LxmlTreeSerializer

@dataclass
class Foo:
    attr: int = field(metadata={"type":"Attribute"})
    value: str = field(metadata={"type":"Text"})

obj = Foo(attr=1, value="foo")

serializer = LxmlTreeSerializer()
element = serializer.render(obj)

print(lxml.etree.tostring(element, pretty_print=True).decode())
<Foo>foo</Foo>
tefra commented 3 months ago

We completely missed the mark there, the serializer config pretty_print is completely irrelevant on the tree serializer

skinkie commented 3 months ago

I think it isn't irrelevant. If that (replaced) part on the tree is the only part that isn't pretty printed, it is clear that the serializer has something to do with the pretty printing, it is not handled at the lxml.etree.write.

tefra commented 3 months ago

Please open a new issue, with a simple example for me :)

skinkie commented 3 months ago

Simple examplen work without issues. Replicating it with the given example is the problem.

from typing import List

from lxml import etree

from tests.fixtures.books import BookForm, BooksForm
from tests.fixtures.books.fixtures import books
from xsdata.formats.dataclass.context import XmlContext
from xsdata.formats.dataclass.parsers import XmlParser
from xsdata.formats.dataclass.parsers.config import ParserConfig
from xsdata.formats.dataclass.parsers.handlers import LxmlEventHandler
from xsdata.formats.dataclass.serializers import LxmlTreeSerializer

serializer = LxmlTreeSerializer()
result = serializer.render(books)

open("/tmp/test.xml", "wb").write(etree.tostring(result, pretty_print=True))

parser = etree.XMLParser(remove_blank_text=True)
tree = etree.parse("/tmp/test.xml", parser)
tree.write("/tmp/test2.xml", pretty_print=True)

context = XmlContext()
config = ParserConfig(fail_on_unknown_properties=False)
xml_parser = XmlParser(context=context, config=config, handler=LxmlEventHandler)

books_list: List[BookForm] = []
for book_xml in tree.iterfind(".//book"):
    book: BookForm = xml_parser.parse(book_xml, BookForm)
    book.id = "XXX"
    books_list.append(book)

lxml_serializer = LxmlTreeSerializer(context)

tree3 = etree.parse("/tmp/test.xml", parser) # Because my XmlParser, does not clear the elements
element = tree3.find(".//book")
element.getparent().replace(element, lxml_serializer.render(books_list[0]))
tree3.write("/tmp/test3.xml", pretty_print=True, strip_text=True)
from typing import List

from lxml import etree

from tests.fixtures.artists import metadata, AliasList
from xsdata.formats.dataclass.context import XmlContext
from xsdata.formats.dataclass.parsers import XmlParser
from xsdata.formats.dataclass.parsers.config import ParserConfig
from xsdata.formats.dataclass.parsers.handlers import LxmlEventHandler
from xsdata.formats.dataclass.serializers import LxmlTreeSerializer

parser = etree.XMLParser(remove_blank_text=True)
tree = etree.parse("tests/fixtures/artists/art001.xml", parser)

context = XmlContext()
config = ParserConfig(fail_on_unknown_properties=False)
xml_parser = XmlParser(context=context, config=config, handler=LxmlEventHandler)

aliases: List[metadata.Alias] = []
for alias_element in tree.iterfind(".//{http://musicbrainz.org/ns/mmd-2.0#}alias"):
    alias: metadata.Alias = xml_parser.parse(alias_element, metadata.Alias)
    alias.value = "XXX"
    aliases.append(alias)

alias_list = AliasList(count=len(aliases), alias=aliases)

lxml_serializer = LxmlTreeSerializer(context)

# tree3 = etree.parse("/tmp/test.xml", parser) # Because my XmlParser, does not clear the elements
element = tree.find(".//{http://musicbrainz.org/ns/mmd-2.0#}alias-list")
element.getparent().replace(element, lxml_serializer.render(alias_list))
tree.write("/tmp/alias-list.xml", pretty_print=True, strip_text=True)
from lxml import etree

from tests.fixtures.artists import metadata, AliasList
from xsdata.formats.dataclass.context import XmlContext
from xsdata.formats.dataclass.parsers import XmlParser
from xsdata.formats.dataclass.parsers.config import ParserConfig
from xsdata.formats.dataclass.parsers.handlers import LxmlEventHandler
from xsdata.formats.dataclass.serializers import LxmlTreeSerializer

parser = etree.XMLParser(remove_blank_text=True)
tree = etree.parse("tests/fixtures/artists/art001.xml", parser)

context = XmlContext()
config = ParserConfig(fail_on_unknown_properties=False)
xml_parser = XmlParser(context=context, config=config, handler=LxmlEventHandler)

alias_list = None
for alias_list_xml in tree.iterfind(".//{http://musicbrainz.org/ns/mmd-2.0#}alias-list"):
    alias_list: metadata.AliasList = xml_parser.parse(alias_list_xml, metadata.AliasList)

lxml_serializer = LxmlTreeSerializer(context)

element = tree.find(".//{http://musicbrainz.org/ns/mmd-2.0#}alias-list")
element.getparent().replace(element, lxml_serializer.render(alias_list))
tree.write("/tmp/alias-list.xml", pretty_print=True, strip_text=True)