tafia / quick-xml

Rust high performance xml reader and writer
MIT License
1.22k stars 238 forks source link

Duplicate field in standard BPMN XML #791

Closed snoop244 closed 3 months ago

snoop244 commented 3 months ago

I don't work with a lot of XML, so I'm not sure if Business Process Modeling Notation breaks some rules, but I doubt it.

In my toy project I pass it a small BPMN file and ask it to parse it to my data structures. I have one type of element: that shows up multiple times, like many other BPMN elements but, for some reason, it generates the duplicate field error while the other repeated elements do not. What is unique about this type of element in BPMN XML is that it is self-closing. Other than that, I can't explain why I get Custom { field: "duplicate fieldsequenceFlow" }

If I remove the other instances of this tag, leaving only 1 instance, it works as expected. If I have duplicates of any other element, they work as expected.

[package]
name = "bpmn_parser"
version = "0.1.0"
edition = "2021"

[dependencies]
quick-xml = { version = "0.36.1", features = ["serialize", "overlapped-lists"] }
serde = { version = "1.0", features = ["derive"] }
serde_derive = "1.0"
serde-xml-rs = "0.6.0"

main.rs:

mod bpmn;

use std::fs::File;
use std::io::Read;
use serde_xml_rs::from_reader;
use bpmn::Definitions;

fn main() {
    // Read BPMN XML from file
    let mut file = File::open("bpmn.xml").expect("Unable to open BPMN XML file");
    let mut xml_data = String::new();
    file.read_to_string(&mut xml_data).expect("Unable to read BPMN XML file");

    // Parse the XML data
    let definitions: Definitions = from_reader(xml_data.as_bytes()).expect("Failed to parse BPMN XML");

    // Print the parsed definitions
    println!("{:#?}", definitions);
}

my types in bpmn.rs:

use serde::Deserialize;

#[derive(Debug, Deserialize, Default)]
#[serde(rename_all = "camelCase")]
pub struct Definitions {
    #[serde(rename = "process", default)]
    pub processes: Vec<Process>,
}

#[derive(Debug, Deserialize, Default)]
#[serde(rename_all = "camelCase")]
pub struct Process {
    #[serde(rename = "id")]
    pub id: String,
    #[serde(rename = "isExecutable")]
    pub is_executable: bool,
    #[serde(rename = "startEvent", default)]
    pub start_events: Vec<StartEvent>,
    #[serde(rename = "sequenceFlow", default)]
    pub sequence_flows: Vec<SequenceFlow>,
    #[serde(rename = "scriptTask", default)]
    pub script_tasks: Vec<ScriptTask>,
    #[serde(rename = "userTask", default)]
    pub user_tasks: Vec<UserTask>,
    #[serde(rename = "serviceTask", default)]
    pub service_tasks: Vec<ServiceTask>,
    #[serde(rename = "exclusiveGateway", default)]
    pub exclusive_gateways: Vec<ExclusiveGateway>,
    #[serde(rename = "endEvent", default)]
    pub end_events: Vec<EndEvent>,
}

#[derive(Debug, Deserialize, Default)]
#[serde(rename_all = "camelCase")]
pub struct StartEvent {
    #[serde(rename = "id")]
    pub id: String,
    #[serde(default)]
    pub outgoing: Vec<String>,
}

#[derive(Debug, Deserialize, Default)]
#[serde(rename_all = "camelCase")]
pub struct SequenceFlow {
    #[serde(rename = "id")]
    pub id: String,
    #[serde(rename = "sourceRef")]
    pub source_ref: String,
    #[serde(rename = "targetRef")]
    pub target_ref: String,
}

#[derive(Debug, Deserialize, Default)]
#[serde(rename_all = "camelCase")]
pub struct ScriptTask {
    #[serde(rename = "id")]
    pub id: String,
    #[serde(rename = "name")]
    pub name: String,
    #[serde(default)]
    pub incoming: Vec<String>,
    #[serde(default)]
    pub outgoing: Vec<String>,
}

#[derive(Debug, Deserialize, Default)]
#[serde(rename_all = "camelCase")]
pub struct UserTask {
    #[serde(rename = "id")]
    pub id: String,
    #[serde(rename = "name")]
    pub name: String,
    #[serde(default)]
    pub incoming: Vec<String>,
    #[serde(default)]
    pub outgoing: Vec<String>,
}

#[derive(Debug, Deserialize, Default)]
#[serde(rename_all = "camelCase")]
pub struct ServiceTask {
    #[serde(rename = "id")]
    pub id: String,
    #[serde(rename = "name")]
    pub name: String,
    #[serde(default)]
    pub incoming: Vec<String>,
    #[serde(default)]
    pub outgoing: Vec<String>,
}

#[derive(Debug, Deserialize, Default)]
#[serde(rename_all = "camelCase")]
pub struct ExclusiveGateway {
    #[serde(rename = "id")]
    pub id: String,
    #[serde(rename = "name")]
    pub name: String,
    #[serde(default)]
    pub incoming: Vec<String>,
    #[serde(default)]
    pub outgoing: Vec<String>,
}

#[derive(Debug, Deserialize, Default)]
#[serde(rename_all = "camelCase")]
pub struct EndEvent {
    #[serde(rename = "id")]
    pub id: String,
    #[serde(default)]
    pub incoming: Vec<String>,
}

my xml file:

<?xml version="1.0" encoding="UTF-8"?>
<bpmn:definitions xmlns:bpmn="http://www.omg.org/spec/BPMN/20100524/MODEL" xmlns:bpmndi="http://www.omg.org/spec/BPMN/20100524/DI" xmlns:dc="http://www.omg.org/spec/DD/20100524/DC" xmlns:di="http://www.omg.org/spec/DD/20100524/DI" xmlns:modeler="http://camunda.org/schema/modeler/1.0" id="Definitions_0o0mzbb" targetNamespace="http://bpmn.io/schema/bpmn" exporter="Camunda Modeler" exporterVersion="5.1.0" modeler:executionPlatform="Camunda Platform" modeler:executionPlatformVersion="7.17.0">
  <bpmn:process id="Process_0qdc9pk" isExecutable="true">
    <bpmn:startEvent id="StartEvent_1">
      <bpmn:outgoing>Flow_0sp9p0z</bpmn:outgoing>
    </bpmn:startEvent>
    <bpmn:sequenceFlow id="Flow_0sp9p0z" sourceRef="StartEvent_1" targetRef="Activity_102bfvd" /> 
    <bpmn:scriptTask id="Activity_102bfvd" name="A Script Task">
      <bpmn:incoming>Flow_0sp9p0z</bpmn:incoming>
      <bpmn:outgoing>Flow_1oq7kuv</bpmn:outgoing>
    </bpmn:scriptTask>
    <bpmn:scriptTask id="Activity_102bfgg" name="A Script Task">
      <bpmn:incoming>Flow_0sp9p0z</bpmn:incoming>
      <bpmn:outgoing>Flow_1oq7kuv</bpmn:outgoing>
    </bpmn:scriptTask>
    <bpmn:sequenceFlow id="Flow_testp0z" sourceRef="StartEvent_1" targetRef="Activity_102bfvd" /> 
    <bpmn:userTask id="Activity_0za2mys" name="A User Task">
      <bpmn:incoming>Flow_1lv617d</bpmn:incoming>
      <bpmn:outgoing>Flow_01raunh</bpmn:outgoing>
    </bpmn:userTask>
    <bpmn:serviceTask id="Activity_015xbrs" name="A Service Task">
      <bpmn:incoming>Flow_0f5r9p3</bpmn:incoming>
      <bpmn:outgoing>Flow_0l6i3pw</bpmn:outgoing>
    </bpmn:serviceTask>
    <bpmn:serviceTask id="Activity_01ii5xbrs" name="A Service Task">
      <bpmn:incoming>Flow_0f5r9p3</bpmn:incoming>
      <bpmn:outgoing>Flow_0l6i3pw</bpmn:outgoing>
    </bpmn:serviceTask>
    <bpmn:exclusiveGateway id="Gateway_0yccqkd" name="An Exclusive Gateway">
      <bpmn:incoming>Flow_1oq7kuv</bpmn:incoming>
      <bpmn:outgoing>Flow_1lv617d</bpmn:outgoing>
      <bpmn:outgoing>Flow_0f5r9p3</bpmn:outgoing>
    </bpmn:exclusiveGateway>
    <bpmn:endEvent id="Event_03v4c4g">
      <bpmn:incoming>Flow_01raunh</bpmn:incoming>
      <bpmn:incoming>Flow_0l6i3pw</bpmn:incoming>
    </bpmn:endEvent>
  </bpmn:process>
</bpmn:definitions>
Mingun commented 3 months ago

I cannot reproduce that. When I fixed your model (append @ to renames that corresponds to attributes) everything worked as expected. So I recommend you to check your XML / model for misprints.

The full code ```rust #[test] fn issue791() { #[derive(Debug, Deserialize, Default, PartialEq)] #[serde(rename_all = "camelCase")] pub struct Definitions { #[serde(rename = "process", default)] pub processes: Vec, } #[derive(Debug, Deserialize, Default, PartialEq)] #[serde(rename_all = "camelCase")] pub struct Process { #[serde(rename = "@id")] pub id: String, #[serde(rename = "@isExecutable")] pub is_executable: bool, #[serde(rename = "startEvent", default)] pub start_events: Vec, #[serde(rename = "sequenceFlow", default)] pub sequence_flows: Vec, #[serde(rename = "scriptTask", default)] pub script_tasks: Vec, #[serde(rename = "userTask", default)] pub user_tasks: Vec, #[serde(rename = "serviceTask", default)] pub service_tasks: Vec, #[serde(rename = "exclusiveGateway", default)] pub exclusive_gateways: Vec, #[serde(rename = "endEvent", default)] pub end_events: Vec, } #[derive(Debug, Deserialize, Default, PartialEq)] #[serde(rename_all = "camelCase")] pub struct StartEvent { #[serde(rename = "@id")] pub id: String, #[serde(default)] pub outgoing: Vec, } #[derive(Debug, Deserialize, Default, PartialEq)] #[serde(rename_all = "camelCase")] pub struct SequenceFlow { #[serde(rename = "@id")] pub id: String, #[serde(rename = "@sourceRef")] pub source_ref: String, #[serde(rename = "@targetRef")] pub target_ref: String, } #[derive(Debug, Deserialize, Default, PartialEq)] #[serde(rename_all = "camelCase")] pub struct ScriptTask { #[serde(rename = "@id")] pub id: String, #[serde(rename = "@name")] pub name: String, #[serde(default)] pub incoming: Vec, #[serde(default)] pub outgoing: Vec, } #[derive(Debug, Deserialize, Default, PartialEq)] #[serde(rename_all = "camelCase")] pub struct UserTask { #[serde(rename = "@id")] pub id: String, #[serde(rename = "@name")] pub name: String, #[serde(default)] pub incoming: Vec, #[serde(default)] pub outgoing: Vec, } #[derive(Debug, Deserialize, Default, PartialEq)] #[serde(rename_all = "camelCase")] pub struct ServiceTask { #[serde(rename = "@id")] pub id: String, #[serde(rename = "@name")] pub name: String, #[serde(default)] pub incoming: Vec, #[serde(default)] pub outgoing: Vec, } #[derive(Debug, Deserialize, Default, PartialEq)] #[serde(rename_all = "camelCase")] pub struct ExclusiveGateway { #[serde(rename = "@id")] pub id: String, #[serde(rename = "@name")] pub name: String, #[serde(default)] pub incoming: Vec, #[serde(default)] pub outgoing: Vec, } #[derive(Debug, Deserialize, Default, PartialEq)] #[serde(rename_all = "camelCase")] pub struct EndEvent { #[serde(rename = "@id")] pub id: String, #[serde(default)] pub incoming: Vec, } let xml_data = r#" Flow_0sp9p0z Flow_0sp9p0z Flow_1oq7kuv Flow_0sp9p0z Flow_1oq7kuv Flow_1lv617d Flow_01raunh Flow_0f5r9p3 Flow_0l6i3pw Flow_0f5r9p3 Flow_0l6i3pw Flow_1oq7kuv Flow_1lv617d Flow_0f5r9p3 Flow_01raunh Flow_0l6i3pw "#; // Parse the XML data let definitions: Definitions = quick_xml::de::from_reader(xml_data.as_bytes()).expect("Failed to parse BPMN XML"); assert_eq!(definitions, Definitions { processes: vec![Process { id: "Process_0qdc9pk".into(), is_executable: true, start_events: vec![StartEvent { id: "StartEvent_1".into(), outgoing: vec!["Flow_0sp9p0z".into()], }], sequence_flows: vec![ SequenceFlow { id: "Flow_0sp9p0z".into(), source_ref: "StartEvent_1".into(), target_ref: "Activity_102bfvd".into(), }, SequenceFlow { id: "Flow_testp0z".into(), source_ref: "StartEvent_1".into(), target_ref: "Activity_102bfvd".into(), }, ], script_tasks: vec![ ScriptTask { id: "Activity_102bfvd".into(), name: "A Script Task".into(), incoming: vec!["Flow_0sp9p0z".into()], outgoing: vec!["Flow_1oq7kuv".into()], }, ScriptTask { id: "Activity_102bfgg".into(), name: "A Script Task".into(), incoming: vec!["Flow_0sp9p0z".into()], outgoing: vec!["Flow_1oq7kuv".into()], }, ], user_tasks: vec![ UserTask { id: "Activity_0za2mys".into(), name: "A User Task".into(), incoming: vec!["Flow_1lv617d".into()], outgoing: vec!["Flow_01raunh".into()], }, ], service_tasks: vec![ ServiceTask { id: "Activity_015xbrs".into(), name: "A Service Task".into(), incoming: vec!["Flow_0f5r9p3".into()], outgoing: vec!["Flow_0l6i3pw".into()], }, ServiceTask { id: "Activity_01ii5xbrs".into(), name: "A Service Task".into(), incoming: vec!["Flow_0f5r9p3".into()], outgoing: vec!["Flow_0l6i3pw".into()], }, ], exclusive_gateways: vec![ ExclusiveGateway { id: "Gateway_0yccqkd".into(), name: "An Exclusive Gateway".into(), incoming: vec!["Flow_1oq7kuv".into()], outgoing: vec!["Flow_1lv617d".into(), "Flow_0f5r9p3".into()], }, ], end_events: vec![EndEvent { id: "Event_03v4c4g".into(), incoming: vec!["Flow_01raunh".into(), "Flow_0l6i3pw".into()], }], }] }); } ```
snoop244 commented 3 months ago

Thanks. I had tried adding back the "@" for attributes, but it gave me a different error. Your working code used the proper quick_xml::de::from_reader() whereas mine mistakenly used the serde_xml_rs from_reader(). When I changed that, it worked as-expected.

Thanks for the very speedy response.