oxidecomputer / typify

compiler from JSON Schema into idiomatic Rust types
Apache License 2.0
401 stars 57 forks source link

Support oneOf together with properties #272

Closed BadMagic100 closed 11 months ago

BadMagic100 commented 1 year ago

When using properties and oneOf together, generation fails. I was using this pattern for a couple use cases:

These can both be worked around fairly trivially (and I think I probably will do that in this case) but these both seem like reasonable cases to handle.

Schema:

{
    "$schema": "https://json-schema.org/draft-07/schema#",
    "$id": "https://badmagic100.github.io/HollowKnight.Packages/hpackage.schema.json",
    "title": "Hollow Knight PackageDef",
    "description": "Package definition schema for Hollow Knight mods and modpacks",
    "type": "object",
    "required": ["name", "description", "repository", "assets"],
    "additionalProperties": false,
    "properties": {
        "name": {
            "type": "object"
        },
        "description": {
            "type": "string"
        },
        "author": {
            "type": "string"
        },
        "repository": {
            "$ref": "#/definitions/Link"
        },
        "assets": {
            "type": "array",
            "items": {
                "$ref": "#/definitions/Asset"
            }
        },
        "dependencies": {
            "$ref": "#/definitions/References"
        },
        "devDependencies": {
            "$ref": "#/definitions/References"
        }
    },
    "definitions": {
        "Link": {
            "type": "string",
            "format": "uri"
        },
        "Asset": {
            "oneOf": [
                {
                    "type": "string"
                },
                {
                    "$ref": "#/definitions/PlatformAsset"
                }
            ]
        },
        "PlatformAsset": {
            "type": "object",
            "required": ["platform", "path"],
            "additionalProperties": false,
            "properties": {
                "platform": {
                    "$ref": "#/definitions/Platform"
                },
                "path": {
                    "type": "string"
                }
            }
        },
        "Platform": {
            "type": "string",
            "enum": ["win32", "macos", "linux"]
        },
        "References": {
            "oneOf": [
                { "type": "array", "items": { "type": "string" }, "$comment": "An array of mod names. Version is inferred to be @modlinks." },
                { "type": "object", "additionalProperties": { "$ref": "#/definitions/StringVersion" }, "$comment": "A map of mod name to version." },
                { "type": "object", "additionalProperties": { "$ref": "#/definitions/ReferenceDef" }, "$comment": "A map of mod name to full version spec." }
            ]
        },
        "ReferenceDef": {
            "type": "object",
            "oneOf": [
                { "$ref": "#/definitions/GitReference" },
                { "$ref": "#/definitions/ModlinksReference" },
                { "$ref": "#/definitions/LinkReference" }
            ],
            "properties": {
                "alternateInstallName": {
                    "type": "string"
                },
                "fileType": {
                    "type": "string",
                    "enum": ["zip", "dll"]
                }
            }
        },
        "StringVersion": {
            "$comment": "A mod version identifier. Versions are processed, in precedence order, as follows:\n* Direct download URL\n* @latest or @modlinks, specifying the latest version in the git repo or the latest version on modlinks respectively\n* A modlinks version number, if that version is present on modlinks.\n* A github release tag",
            "type": "string"
        },
        "GitReference": {
            "type": "object",
            "additionalProperties": false,
            "properties": {
                "tag": {
                    "type": "string"
                },
                "useLatestRelease": {
                    "type": "boolean",
                    "const": true,
                    "$comment": "If this flag is set, tag is ignored."
                },
                "asset": {
                    "type": "string"
                }
            }
        },
        "ModlinksReference": {
            "type": "object",
            "additionalProperties": false,
            "properties": {
                "version": {
                    "type": "string"
                },
                "useLatestPublished": {
                    "type": "boolean",
                    "const": true,
                    "$comment": "If this flag is set, version is ignored."
                }
            }
        },
        "LinkReference": {
            "type": "object",
            "required": ["link"],
            "additionalProperties": false,
            "properties": {
                "link": {
                    "$ref": "#/definitions/Link"
                }
            }
        }
    }
}

cargo typify output:

Message:  not yet implemented: invalid (or unexpected) schema:
SchemaObject {
    metadata: None,
    instance_type: Some(
        Single(
            Object,
        ),
    ),
    format: None,
    enum_values: None,
    const_value: None,
    subschemas: Some(
        SubschemaValidation {
            all_of: None,
            any_of: None,
            one_of: Some(
                [
                    Object(
                        SchemaObject {
                            metadata: None,
                            instance_type: None,
                            format: None,
                            enum_values: None,
                            const_value: None,
                            subschemas: None,
                            number: None,
                            string: None,
                            array: None,
                            object: None,
                            reference: Some(
                                "#/definitions/GitReference",
                            ),
                            extensions: {},
                        },
                    ),
                    Object(
                        SchemaObject {
                            metadata: None,
                            instance_type: None,
                            format: None,
                            enum_values: None,
                            const_value: None,
                            subschemas: None,
                            number: None,
                            string: None,
                            array: None,
                            object: None,
                            reference: Some(
                                "#/definitions/ModlinksReference",
                            ),
                            extensions: {},
                        },
                    ),
                    Object(
                        SchemaObject {
                            metadata: None,
                            instance_type: None,
                            format: None,
                            enum_values: None,
                            const_value: None,
                            subschemas: None,
                            number: None,
                            string: None,
                            array: None,
                            object: None,
                            reference: Some(
                                "#/definitions/LinkReference",
                            ),
                            extensions: {},
                        },
                    ),
                ],
            ),
            not: None,
            if_schema: None,
            then_schema: None,
            else_schema: None,
        },
    ),
    number: None,
    string: None,
    array: None,
    object: Some(
        ObjectValidation {
            max_properties: None,
            min_properties: None,
            required: {},
            properties: {
                "alternateInstallName": Object(
                    SchemaObject {
                        metadata: None,
                        instance_type: Some(
                            Single(
                                String,
                            ),
                        ),
                        format: None,
                        enum_values: None,
                        const_value: None,
                        subschemas: None,
                        number: None,
                        string: None,
                        array: None,
                        object: None,
                        reference: None,
                        extensions: {},
                    },
                ),
                "fileType": Object(
                    SchemaObject {
                        metadata: None,
                        instance_type: Some(
                            Single(
                                String,
                            ),
                        ),
                        format: None,
                        enum_values: Some(
                            [
                                String("zip"),
                                String("dll"),
                            ],
                        ),
                        const_value: None,
                        subschemas: None,
                        number: None,
                        string: None,
                        array: None,
                        object: None,
                        reference: None,
                        extensions: {},
                    },
                ),
            },
            pattern_properties: {},
            additional_properties: None,
            property_names: None,
        },
    ),
    reference: None,
    extensions: {},
}
ahl commented 1 year ago

Regarding the use of the oneOf to require exactly one set of properties to be specified, how would you expect that to be rendered in Rust? I've seen some cases like this before and was thinking about something like this:

struct TypeWithXorProperties {
    some_prop: String,
    one_or_the_other: OneOrTheOther,
}

enum OneOrTheOther {
    One(String),
    TheOther(u32),
}

What do you think?


Regard the latter case:

        "ReferenceDef": {
            "type": "object",
            "oneOf": [
                { "$ref": "#/definitions/GitReference" },
                { "$ref": "#/definitions/ModlinksReference" },
                { "$ref": "#/definitions/LinkReference" }
            ],
            "properties": {
                "alternateInstallName": {
                    "type": "string"
                },
                "fileType": {
                    "type": "string",
                    "enum": ["zip", "dll"]
                }
            }
        },

I could imagine modeling that as something like this:

#[serde(untagged)]
enum ReferenceDef {
    GitReferenceLink {
        #[serde(flatten)]
        inner: GitReferenceLink,
        alternate_install_name: String,
        file_type: ReferenceDefFileType, // enum { Zip, Dll }
    },
    ..
}

That is: each variant would include the original type flattened along with the ancillary data. Alternatively we could do something like this:

enum ReferenceDefInner {
    GitReferenceLink(GitReferenceLink),
    ..
}

struct ReferenceDef {
    #[serde(flatten)]
    inner: ReferenceDefInner,
    alternate_install_name: String,
    file_type: ReferenceDefFileType,
}

That would effectively treat this construct like an implicit allOf: [ { oneOf: [..] }, { properties: {..} } ].

Thoughts?

BadMagic100 commented 1 year ago

For the first case - I think your proposed approach is fine. I've seen some other tools handle this by just ignoring that constraint in the generated code (and presumably leaving it to the user run through an external schema validator, which is something I'm intending to do anyway), but I'll leave the final call up to you. I'm new enough to Rust that I can't really say how necessary an additional validation layer is on top of whatever may be in serde_json.

For the second case, I think I prefer your first proposed approach where ReferenceDef is the enum. In my mental model, that puts the ReferenceDef closer in shape to the actual JSON representation

ahl commented 11 months ago

The output here is now improved... but still not something I think matches the intention of the schema. Consider this:

        "ReferenceDef": {
            "type": "object",
            "oneOf": [
                { "$ref": "#/definitions/GitReference" },
                { "$ref": "#/definitions/ModlinksReference" },
                { "$ref": "#/definitions/LinkReference" }
            ],
            "properties": {
                "alternateInstallName": {
                    "type": "string"
                },
                "fileType": {
                    "type": "string",
                    "enum": ["zip", "dll"]
                }
            }
        },
...
        "GitReference": {
            "type": "object",
            "additionalProperties": false,
            "properties": {
                "tag": {
                    "type": "string"
                },
                "useLatestRelease": {
                    "type": "boolean",
                    "const": true,
                    "$comment": "If this flag is set, tag is ignored."
                },
                "asset": {
                    "type": "string"
                }
            }
        },

Note that GitReference.additionalProperties is false. This means that when combined with the additional properties in the parent schema... those are ignored. See https://json-schema.org/understanding-json-schema/reference/object.html#id6

I'm not sure how best to handle this. I am loth to create some "ignore additionalProperties" flag (at least at this time). I would prefer to have some preprocessor to flag and/or eliminate situations like this (i.e. where merged schemas are getting impinged by the value of additionalProperties. Or you could do a json patch to remove them. In either case, this is what we're currently generating:

#[derive(Clone, Debug, Deserialize, Serialize)]
#[serde(untagged, deny_unknown_fields)]
pub enum ReferenceDef {
    Variant0 {
        #[serde(default, skip_serializing_if = "Option::is_none")]
        asset: Option<String>,
        #[serde(default, skip_serializing_if = "Option::is_none")]
        tag: Option<String>,
        #[serde(
            rename = "useLatestRelease",
            default,
            skip_serializing_if = "Option::is_none"
        )]
        use_latest_release: Option<bool>,
    },
    Variant1 {
        #[serde(
            rename = "useLatestPublished",
            default,
            skip_serializing_if = "Option::is_none"
        )]
        use_latest_published: Option<bool>,
        #[serde(default, skip_serializing_if = "Option::is_none")]
        version: Option<String>,
    },
    Variant2 {
        link: Link,
    },
}

I'd prefer that we leave it as the referenced type (rather than expanding them) in the situation (like this) where the merged schema is identical to the referenced schema (i.e. it is unchanged by the merge operation).

ahl commented 11 months ago

Closing this as the initial issue has been (at least somewhat) addressed.