bazaarvoice / jolt

JSON to JSON transformation library written in Java.
Apache License 2.0
1.56k stars 329 forks source link

Merge sub-arrays having the same main items #1245

Closed Chris-Fri closed 8 months ago

Chris-Fri commented 8 months ago

I have the below json

{
  "data": {
    "designGeneral": [
      {
        "id": "https://id.com/a6/25",
        "designedFor": {
          "id": "https://id.com/a2/758",
          "title": "title-758"
        },
        "hasPart": [
          {
            "id": "https://id.com/a6/activity/53",
            "containsActivityDescription": [
              {
                "id": "https://id.com/a6/subActivity/25",
                "hasPart": [
                  {
                    "id": "https://id.com/a6/nominalAct/17",
                    "agent": {
                      "localDescriptors": [
                        {
                          "localIdentifier": "0052807-000"
                        }
                      ]
                    },
                    "specifies": {
                      "localDescriptors": {
                        "localIdentifier": "0052807-000-071"
                      }
                    }
                  }
                ],
                "summary": "(Active) 0052807-000 @10"
              }
            ]
          }
        ]
      },
      {
        "id": "https://id.com/a6/41",
        "designedFor": {
          "id": "https://id.com/a2/831",
          "title": "title-831"
        },
        "hasPart": [
          {
            "id": "https://id.com/a6/activity/149",
            "containsActivityDescription": [
              {
                "id": "https://id.com/a6/subActivity/61",
                "hasPart": [
                  {
                    "id": "https://id.com/a6/nominalAct/41",
                    "agent": {
                      "localDescriptors": [
                        {
                          "localIdentifier": "0052807-000"
                        }
                      ]
                    },
                    "specifies": {
                      "localDescriptors": {
                        "localIdentifier": "0052807-000-071"
                      }
                    }
                  }
                ],
                "summary": "(Active) 0052807-000 @10"
              }
            ]
          }
        ]
      },
      {
        "id": "https://id.com/a6/42",
        "designedFor": {
          "id": "https://id.com/a2/832",
          "title": "title-832"
        },
        "hasPart": [
          {
            "id": "https://id.com/a6/activity/152",
            "containsActivityDescription": [
              {
                "id": "https://id.com/a6/subActivity/63",
                "hasPart": [
                  {
                    "id": "https://id.com/a6/nominalAct/42",
                    "agent": {
                      "localDescriptors": [
                        {
                          "localIdentifier": "0052807-000"
                        }
                      ]
                    },
                    "specifies": {
                      "localDescriptors": {
                        "localIdentifier": "0052807-000-071"
                      }
                    }
                  }
                ],
                "summary": "(Active) 0052807-000 @1"
              }
            ]
          }
        ]
      },
      {
        "id": "https://id.com/a6/42",
        "designedFor": {
          "id": "https://id.com/a2/832",
          "title": "title-832"
        },
        "hasPart": [
          {
            "id": "https://id.com/a6/activity/153",
            "containsActivityDescription": [
              {
                "id": "https://id.com/a6/subActivity/64",
                "hasPart": [
                  {
                    "id": "https://id.com/a6/nominalAct/43",
                    "agent": {
                      "localDescriptors": [
                        {
                          "localIdentifier": "0052807-000"
                        }
                      ]
                    },
                    "specifies": {
                      "localDescriptors": {
                        "localIdentifier": "0052807-000-071"
                      }
                    }
                  }
                ],
                "summary": "(Active) 0052807-000 @3"
              }
            ]
          }
        ]
      }
    ]
  }
}

and I would like to modify it to get the below json where the last 2 entries are merged as they have the same

"id": "https://id.com/a6/42",
        "designedFor": {
          "id": "https://id.com/a2/832",
          "title": "title-832"
        },
{
  "data": {
    "designGeneral": [
      {
        "id": "https://id.com/a6/25",
        "designedFor": {
          "id": "https://id.com/a2/758",
          "title": "title-758"
        },
        "hasPart": [
          {
            "id": "https://id.com/a6/activity/53",
            "containsActivityDescription": [
              {
                "id": "https://id.com/a6/subActivity/25",
                "hasPart": [
                  {
                    "id": "https://id.com/a6/nominalAct/17",
                    "agent": {
                      "localDescriptors": [
                        {
                          "localIdentifier": "0052807-000"
                        }
                      ]
                    },
                    "specifies": {
                      "localDescriptors": {
                        "localIdentifier": "0052807-000-071"
                      }
                    }
                  }
                ],
                "summary": "(Active) 0052807-000 @10"
              }
            ]
          }
        ]
      },
      {
        "id": "https://id.com/a6/41",
        "designedFor": {
          "id": "https://id.com/a2/831",
          "title": "title-831"
        },
        "hasPart": [
          {
            "id": "https://id.com/a6/activity/149",
            "containsActivityDescription": [
              {
                "id": "https://id.com/a6/subActivity/61",
                "hasPart": [
                  {
                    "id": "https://id.com/a6/nominalAct/41",
                    "agent": {
                      "localDescriptors": [
                        {
                          "localIdentifier": "0052807-000"
                        }
                      ]
                    },
                    "specifies": {
                      "localDescriptors": {
                        "localIdentifier": "0052807-000-071"
                      }
                    }
                  }
                ],
                "summary": "(Active) 0052807-000 @10"
              }
            ]
          }
        ]
      },
      {
        "id": "https://id.com/a6/42",
        "designedFor": {
          "id": "https://id.com/a2/832",
          "title": "title-832"
        },
        "hasPart": [
          {
            "id": "https://id.com/a6/activity/152",
            "containsActivityDescription": [
              {
                "id": "https://id.com/a6/subActivity/63",
                "hasPart": [
                  {
                    "id": "https://id.com/a6/nominalAct/42",
                    "agent": {
                      "localDescriptors": [
                        {
                          "localIdentifier": "0052807-000"
                        }
                      ]
                    },
                    "specifies": {
                      "localDescriptors": {
                        "localIdentifier": "0052807-000-071"
                      }
                    }
                  }
                ],
                "summary": "(Active) 0052807-000 @1"
              }
            ]
          },
          {
            "id": "https://id.com/a6/activity/153",
            "containsActivityDescription": [
              {
                "id": "https://id.com/a6/subActivity/64",
                "hasPart": [
                  {
                    "id": "https://id.com/a6/nominalAct/43",
                    "agent": {
                      "localDescriptors": [
                        {
                          "localIdentifier": "0052807-000"
                        }
                      ]
                    },
                    "specifies": {
                      "localDescriptors": {
                        "localIdentifier": "0052807-000-071"
                      }
                    }
                  }
                ],
                "summary": "(Active) 0052807-000 @3"
              }
            ]
          }         
        ]
      }
    ]
  }
}

But of course more generally the operation should transform any such json file and always merge the "hasPart" arrays where the "id" and "designedFor" of a "designGeneral" item is the same.

gbouget commented 8 months ago

It's probably possible to reduce the number of jolt operations but it works.

[
  { // grouping by id
    "operation": "shift",
    "spec": {
      "data": {
        "designGeneral": {
          "*": { // index array of designGeneral
            "*": "data.designGeneral.@(1,id).@(1,designedFor.id).&",
            "hasPart": {
              "*": { // index array of hasPart
                "*": "data.designGeneral.@(3,id).@(3,designedFor.id).hasPart.@(1,id).&"
              }
            }
          }
        }
      }
    }
  },
  { // to array designGeneral (after grouping)
    "operation": "shift",
    "spec": {
      "data": {
        "designGeneral": {
          "*": { // id
            "*": "data.designGeneral[]"
          }
        }
      }
    }
  },
  { // to array hasPart (after grouping)
    "operation": "shift",
    "spec": {
      "data": {
        "designGeneral": {
          "*": { // index array
            "*": "data.designGeneral[&1].&",
            "hasPart": {
              "*": "data.designGeneral[&2].&1[]"
            }
          }
        }
      }
    }
  },
  { // get only first element of id and designedFor (delete duplicate values)
    "operation": "cardinality",
    "spec": {
      "data": {
        "designGeneral": {
          "*": { // index array
            "id": "ONE",
            "designedFor": "ONE"
          }
        }
      }
    }
  }
]

image

Chris-Fri commented 8 months ago

That's great! Thanks a lot, @gbouget