oxidecomputer / typify

compiler from JSON Schema into idiomatic Rust types
Apache License 2.0
401 stars 57 forks source link

"anyOf" with "required" doesn't appear to be generating the right enum variants #669

Open mlieberman85 opened 2 weeks ago

mlieberman85 commented 2 weeks ago

I am running against v0.1.0.

I have a type definition in the json schema called ResourceDescriptor. It should require at least one of content, digest, uri fields to be set. However, what I'm seeing is that it makes them exclusive with each other.

Here's my json schema:

{
  "$schema": "https://json-schema.org/draft/2020-12/schema",
  "title": "SummarySCAI",
  "type": "object",
  "properties": {
    "_type": {
      "type": "string"
    },
    "subject": {
      "type": "array",
      "items": {
        "$ref": "#/$defs/ResourceDescriptor"
      }
    },
    "predicateType": {
      "type": "string"
    },
    "predicate": {
      "type": "object",
      "properties": {
        "attributes": {
          "type": "array",
          "items": {
            "type": "object",
            "properties": {
              "attribute": {
                "type": "string",
                "enum": [
                  "PASSED_DEVELOPMENT_ENVIRONMENT",
                  "PASSED_SOURCE",
                  "PASSED_BUILD",
                  "PASSED_PACKAGE",
                  "PASSED_DEPLOY"
                ]
              },
              "conditions": {
                "type": "object",
                "properties": {
                  "policy": {
                    "type": "string"
                  }
                }
              },
              "evidence": {
                "$ref": "#/$defs/ResourceDescriptor"
              }
            },
            "required": ["attribute", "evidence"]
          }
        },
        "producer": {
          "$ref": "#/$defs/ResourceDescriptor"
        }
      },
      "required": ["attributes", "producer"]
    }
  },
  "required": ["_type", "subject", "predicateType", "predicate"],
  "$defs": {
    "ResourceDescriptor": {
      "type": "object",
      "properties": {
        "name": {
          "type": "string"
        },
        "uri": {
          "type": "string"
        },
        "digest": {
          "type": "object",
          "properties": {
            "sha256": {
              "type": "string"
            }
          },
          "required": ["sha256"]
        },
        "content": {
          "type": "string"
        },
        "downloadLocation": {
          "type": "string"
        },
        "mediaType": {
          "type": "string"
        },
        "annotations": {
          "type": "object",
          "additionalProperties": true
        }
      },
      "anyOf": [
        {
          "required": ["uri"]
        },
        {
          "required": ["digest"]
        },
        {
          "required": ["content"]
        }
      ],
      "additionalProperties": false
    }
  }
}

It generates an enum like:

pub enum ResourceDescriptor {
    Variant0 {
        #[serde(default, skip_serializing_if = "serde_json::Map::is_empty")]
        annotations: serde_json::Map<String, serde_json::Value>,
        #[serde(
            rename = "downloadLocation",
            default,
            skip_serializing_if = "Option::is_none"
        )]
        download_location: Option<String>,
        #[serde(rename = "mediaType", default, skip_serializing_if = "Option::is_none")]
        media_type: Option<String>,
        #[serde(default, skip_serializing_if = "Option::is_none")]
        name: Option<String>,
        uri: String,
    },
    Variant1 {
        #[serde(default, skip_serializing_if = "serde_json::Map::is_empty")]
        annotations: serde_json::Map<String, serde_json::Value>,
        digest: ResourceDescriptorVariant1Digest,
        #[serde(
            rename = "downloadLocation",
            default,
            skip_serializing_if = "Option::is_none"
        )]
        download_location: Option<String>,
        #[serde(rename = "mediaType", default, skip_serializing_if = "Option::is_none")]
        media_type: Option<String>,
        #[serde(default, skip_serializing_if = "Option::is_none")]
        name: Option<String>,
    },
    Variant2 {
        #[serde(default, skip_serializing_if = "serde_json::Map::is_empty")]
        annotations: serde_json::Map<String, serde_json::Value>,
        content: String,
        #[serde(
            rename = "downloadLocation",
            default,
            skip_serializing_if = "Option::is_none"
        )]
        download_location: Option<String>,
        #[serde(rename = "mediaType", default, skip_serializing_if = "Option::is_none")]
        media_type: Option<String>,
        #[serde(default, skip_serializing_if = "Option::is_none")]
        name: Option<String>,
    },
}

I would expect the enum variants to look more like:

pub enum ResourceDescriptor {
    Variant0 {
        #[serde(default, skip_serializing_if = "serde_json::Map::is_empty")]
        annotations: serde_json::Map<String, serde_json::Value>,
        content: Option<String>,
        digest: Option<ResourceDescriptorVariant1Digest>,
        #[serde(
            rename = "downloadLocation",
            default,
            skip_serializing_if = "Option::is_none"
        )]
        download_location: Option<String>,
        #[serde(rename = "mediaType", default, skip_serializing_if = "Option::is_none")]
        media_type: Option<String>,
        #[serde(default, skip_serializing_if = "Option::is_none")]
        name: Option<String>,
        uri: String,
    },
    Variant1 {
        #[serde(default, skip_serializing_if = "serde_json::Map::is_empty")]
        annotations: serde_json::Map<String, serde_json::Value>,
        content: Option<String>,
        digest: ResourceDescriptorVariant1Digest,
        #[serde(
            rename = "downloadLocation",
            default,
            skip_serializing_if = "Option::is_none"
        )]
        download_location: Option<String>,
        #[serde(rename = "mediaType", default, skip_serializing_if = "Option::is_none")]
        media_type: Option<String>,
        #[serde(default, skip_serializing_if = "Option::is_none")]
        name: Option<String>,
        uri: Option<String>,
    },
    Variant2 {
        #[serde(default, skip_serializing_if = "serde_json::Map::is_empty")]
        annotations: serde_json::Map<String, serde_json::Value>,
        content: String,
        digest: Option<ResourceDescriptorVariant1Digest>,
        #[serde(
            rename = "downloadLocation",
            default,
            skip_serializing_if = "Option::is_none"
        )]
        download_location: Option<String>,
        #[serde(rename = "mediaType", default, skip_serializing_if = "Option::is_none")]
        media_type: Option<String>,
        #[serde(default, skip_serializing_if = "Option::is_none")]
        name: Option<String>,
        uri: Option<String>,
    },
}

If I run this schema against other validators it does work as intended where as long I have one of content, digest, or uri set it validates.

Here is an example json that does not validate with an error: Error: data did not match any variant of untagged enum ResourceDescriptor at line 20 column 7

{
  "_type": "https://in-toto.io/Statement/v1",
  "subject": [
    {
      "name": "example-software-artifact",
      "digest": { "sha256": "a1b2c3d4e5f6..." }
    }
  ],
  "predicateType": "https://in-toto.io/attestation/scai/attribute-report/v0.2",
  "predicate": {
    "attributes": [
      {
        "attribute": "PASSED_DEVELOPMENT_ENVIRONMENT",
        "evidence": {
          "name": "a1b2c3d4e5f6.development.jsonl",
          "uri": "https://example.com/scai/a1b2c3d4e5f6.development.jsonl",
          "digest": { "sha256": "d1e2f3a4b5c6..." },
          "mediaType": "application/x.dsse+json"
        }
      },
      {
        "attribute": "PASSED_SOURCE",
        "evidence": {
          "name": "a1b2c3d4e5f6.source.jsonl",
          "uri": "https://example.com/scai/a1b2c3d4e5f6.source.jsonl",
          "digest": { "sha256": "e2f3a4b5c6d1..." },
          "mediaType": "application/x.dsse+json"
        }
      },
      {
        "attribute": "PASSED_BUILD",
        "evidence": {
          "name": "a1b2c3d4e5f6.build.jsonl",
          "uri": "https://example.com/scai/a1b2c3d4e5f6.build.jsonl",
          "digest": { "sha256": "f3a4b5c6d1e2..." },
          "mediaType": "application/x.dsse+json"
        }
      },
      {
        "attribute": "PASSED_PACKAGE",
        "evidence": {
          "name": "a1b2c3d4e5f6.package.jsonl",
          "uri": "https://example.com/scai/a1b2c3d4e5f6.package.jsonl",
          "digest": { "sha256": "a4b5c6d1e2f3..." },
          "mediaType": "application/x.dsse+json"
        }
      },
      {
        "attribute": "PASSED_DEPLOY",
        "evidence": {
          "name": "a1b2c3d4e5f6.deploy.jsonl",
          "uri": "https://example.com/scai/a1b2c3d4e5f6.deploy.jsonl",
          "digest": { "sha256": "b5c6d1e2f3a4..." },
          "mediaType": "application/x.dsse+json"
        }
      }
    ],
    "producer": {
      "uri": "https://example.com/gatekeeping-attestor",
      "name": "Gatekeeping Attestor",
      "digest": {
        "sha256": "0123456789abcdef..."
      }
    }
  }
}
PranavKumar-15032001 commented 1 week ago

Adding to the above case:

{
  "$schema": "https://json-schema.org/draft/2020-12/schema",
  "definitions": {
    "AmfCond": {
      "description": "Subscription to a set of AMFs, based on AMF Set Id and/or AMF Region Id",
      "type": "object",
      "anyOf": [
        {
          "required": ["amfSetId"]
        },
        {
          "required": ["amfRegionId"]
        }
      ],
      "properties": {
        "amfRegionId": {
          "$ref": "#/definitions/AmfRegionId"
        },
        "amfSetId": {
          "$ref": "#/definitions/AmfSetId"
        }
      }
    },
    "AmfRegionId": {
      "description": "String identifying the AMF Set ID (10 bits) as specified in clause 2.10.1 of 3GPP TS 23.003.  It is encoded as a string  of 3 hexadecimal characters where the first character is limited to values 0 to 3 (i.e. 10 bits)\n",
      "type": "string",
      "pattern": "^[A-Fa-f0-9]{2}$"
    },
    "AmfSetId": {
      "description": "String identifying the AMF Set ID (10 bits) as specified in clause 2.10.1 of 3GPP TS 23.003.  It is encoded as a string of 3 hexadecimal characters where the first character is limited to values 0 to 3 (i.e. 10 bits).\n",
      "type": "string",
      "pattern": "^[0-3][A-Fa-f0-9]{2}$"
    }
  }
}

In the above json schema the the any of is generated as an Enum, where we can have a case of both the field presents. The generated rust code is:

pub enum AmfCond {
    Variant0 {
        #[serde(rename = "amfSetId")]
        amf_set_id: AmfSetId,
    },
    Variant1 {
        #[serde(rename = "amfRegionId")]
        amf_region_id: AmfRegionId,
    },
}

?