Open rbosch opened 2 months ago
The metadata
in a Pydantic field(...)
definition is only metadata. Pydantic will not use these definitions for handling data validation. If validation has to take place, it should be handled from the xsdata code base therefore.
I am so far unable to find if there is support for actual field value validation, based on these metadata. When looking at min_inclusive
for example, I only find code and tests that are about correctly interpreting the schema definition, and adding the restrictions to the generated class' metadata.
I would like actual validation support too. Some options that I see for this:
So three different levels:
I'm curious to see what @tefra thinks about this. Would validation be something that xsdata(-pydantic) could provide, or would that be something to handle from a different package?
My personal feeling about this, is that using Pydantic somewhat dictates transparent translation of restrictions into Pydantic-native validation code, since Pydantic is all about easy an correct validation of our data objects.
Note BTW that this issue is likely a duplicate of https://github.com/tefra/xsdata-pydantic/issues/14.
I switched to using datamodels because of an issue with SOAP calls. I did write some validation code based on the metadata in the generated datamodels. I presume that this code can be adapted to Pydantic. The main change would be the bit of code from _validate_dataclass(...)
that retrieves the metadata for the validated fields.
Maybe sharing this code can kick off some inspiration.
Disclaimer: this was written to match my acute requirements. "pattern" should apply to numeric fields too for example, but that isn't built in (yet). There are likely other omissions, and possibly misconceptions.
import re
from dataclasses import fields, is_dataclass
from types import MappingProxyType
from typing import Any
from xsdata.models.datatype import XmlDateTime, XmlDate, XmlTime
def validate_xsdata_dataclass(obj: Any) -> None:
if not is_dataclass(obj):
raise ValueError(f"Expected a dataclass, but got {type(obj).__name__!r}")
_validate_dataclass(obj, path="")
def _validate_dataclass(obj: Any, path: str) -> None:
for field_info in list(fields(obj)):
my_value = getattr(obj, field_info.name)
metadata = field_info.metadata
my_path = f"{path}.{field_info.name}" if path != "" else field_info.name
_validate_value(my_value, metadata, my_path)
def _validate_value(obj: Any, metadata: MappingProxyType, path: str) -> None:
if obj is None:
_validate_none(metadata, path)
elif is_dataclass(obj):
_validate_dataclass(obj, path)
elif isinstance(obj, XmlDate):
_validate_xml_date(obj, metadata, path)
elif isinstance(obj, XmlDateTime):
_validate_xml_date_time(obj, metadata, path)
elif isinstance(obj, XmlTime):
_validate_xml_time(obj, metadata, path)
elif isinstance(obj, (list, tuple)):
_validate_collection(obj, metadata, path)
elif isinstance(obj, str):
_validate_string(obj, metadata, path)
elif isinstance(obj, bool):
_validate_boolean(obj, metadata, path)
elif isinstance(obj, float):
_validate_float(obj, metadata, path)
elif isinstance(obj, int):
_validate_integer(obj, metadata, path)
else:
raise NotImplementedError(f"{path!r} has unhandled value type {type(obj).__name__!r}")
def _validate_none(metadata: MappingProxyType, path: str) -> None:
if metadata.get("required"):
raise ValueError(f"{path!r} is required, but its value is None")
def _validate_collection(obj: list[Any] | tuple[Any], metadata: MappingProxyType, path: str) -> None:
if "min_occurs" in metadata and len(obj) < metadata["min_occurs"]:
raise ValueError(f"{path!r} must contain at least {metadata['min_occurs']} items, but got {len(obj)}")
if "max_occurs" in metadata and len(obj) > metadata["max_occurs"]:
raise ValueError(f"{path!r} must contain at most {metadata['max_occurs']} items, but got {len(obj)}")
for idx, contained_obj in enumerate(obj):
my_path = f"{path}[{idx}]"
_validate_value(contained_obj, metadata, my_path)
def _validate_string(obj: str, metadata: MappingProxyType, path: str) -> None:
if max_length := metadata.get("max_length"):
if len(obj) > max_length:
raise ValueError(f"{path!r} is above the allowed maximum length of {max_length}")
if min_length := metadata.get("min_length"):
if len(obj) < min_length:
raise ValueError(f"{path!r} is below the allowed minimum length of {min_length}")
if pattern := metadata.get("pattern"):
regexp = re.compile(f"^({pattern})$")
if not regexp.match(obj):
raise ValueError(f"{path!r} does not match required pattern '{pattern}'")
def _validate_integer(obj: int, metadata: MappingProxyType, path: str) -> None:
_validate_number(obj, metadata, path)
def _validate_float(obj: float, metadata: MappingProxyType, path: str) -> None:
_validate_number(obj, metadata, path)
if fraction_digits := metadata.get("fraction_digits"):
parts = str(obj).split(".")
if len(parts) == 2:
f_part = re.sub(r"0+$", "", str(parts[1]))
if len(f_part) > fraction_digits:
raise ValueError(f"{path!r} exceeds the maximum of {fraction_digits} fractional digits")
def _validate_number(obj: int | float, metadata: MappingProxyType, path: str) -> None:
if "min_inclusive" in metadata and obj < metadata["min_inclusive"]:
raise ValueError(f"{path!r} is below the allowed minimum value of {metadata['min_inclusive']}")
if "max_inclusive" in metadata and obj > metadata["max_inclusive"]:
raise ValueError(f"{path!r} exceeds the allowed maximum value of {metadata['max_inclusive']}")
if total_digits := metadata.get("total_digits"):
# The number of digits is checked after the value is transformed
# to its canonical form and leading and trailing zeros are removed.
# For example 123.456000 -> "123456"
# Leading zeroes is no concern here, since the input type is numeric.
parts = str(obj).split(".")
match parts:
case [integer]:
check_value = re.sub(r"\D", "", integer)
case [integer, fraction]:
i_part = re.sub(r"\D", "", integer)
f_part = re.sub(r"0+$", "", fraction)
check_value = i_part + f_part
case _:
raise NotImplementedError
if len(check_value) > total_digits:
raise ValueError(f"{path!r} exceeds the maximum of {total_digits} digits")
def _validate_boolean(_obj: bool, _metadata: MappingProxyType, _path: str) -> None:
pass
def _validate_xml_date(_obj: XmlDate, _metadata: MappingProxyType, _path: str) -> None:
pass
def _validate_xml_date_time(obj: XmlDateTime, metadata: MappingProxyType, path: str) -> None:
as_xml_time = XmlTime(obj.hour, obj.minute, obj.second, offset=obj.offset)
_validate_xml_time(as_xml_time, metadata, path)
def _validate_xml_time(obj: XmlTime, metadata: MappingProxyType, path: str) -> None:
explicit_timezone = metadata.get("explicit_timezone")
match explicit_timezone:
case None:
pass
case "optional":
pass
case "required":
if obj.offset is None:
raise ValueError(f"{path!r} requires a timezone, but has none")
case "prohibited":
if obj.offset is not None:
raise ValueError(f"{path!r} must not have a timezone, but has one")
case _:
raise NotImplementedError(f"Unhandled explicit_timezone value: {explicit_timezone}")
Hi,
I have this xsd fragment:
It creates a model with the following field (meta data):
But the validators are not working.