schibsted / jslt

JSON query and transformation language
Apache License 2.0
638 stars 120 forks source link

Is possible to check de last four characters of the current index of the array and group by if the next (index+1) of the array of the four characters is different? #330

Closed jricardogon closed 9 months ago

jricardogon commented 9 months ago

Example: I have one array, and if the value is equal, it is the same group, if not is new one. Input: { "data" : ["AAAAAAAAAA1111","AAAAAAAAAA1111", "BBBBBBBBBB2222", "BBBBBBBBBB2222", "AAAAAAAAAA1111","AAAAAAAAAA1111", "AAAAAAAAAA1111"] }

Output expected:

[ { "group": "1111", --last for characters "values": [ "AAAAAAAAAA1111", "AAAAAAAAAA1111" ] }, { "group": "2222", "values": [ "BBBBBBBBBB2222", "BBBBBBBBBB2222" ] }, { "group": "1111", "values": [ "AAAAAAAAAA1111", "AAAAAAAAAA1111", "AAAAAAAAAA1111" ] }

]

tks for help.

larsga commented 9 months ago

This Array[index][-4 : ] will give you the last four characters of a string.

The easiest way to do the grouping is probably to write a recursive function. If the ends of the strings are the same, add to the current group. If they're different, start a new group.

jricardogon commented 9 months ago

This was more near that i achieved. Im have many difficulty because not possible create a variable and to assign concatenate values :(

Im using in this componet (https://docs.digibee.com/documentation/v/pt-br/components/tools/jslt)

let arrayIndexed = zip-with-index(.data)

//let array = {[size($arrayIndexed)]}

def child(value, index) "" // $arrayIndexed[$index]

//[for($arrayIndexed[]) .value if(.index <= $index and .value[ -4: ] == $value)]

let test = [for ($arrayIndexed)

let idx = .index
let groupCurrentLine =  .value[ -4 : ]
let groupNextLine =  $arrayIndexed[$idx + 1].value[ -4: ]

  if( $idx == 0)      
      {"root": $groupCurrentLine , "child": .value }   

  else
     if($groupCurrentLine != $groupNextLine)

       if($groupNextLine != null)
         { "child": .value , "root": $groupNextLine } 
       else
         {"child": .value }
     else
       {"child": .value }    

]

let final = [for ($test) .root ]

{"final": $final, "test": $test}

catull commented 9 months ago

Try this:

// process array of items
//
// "array" is the array
// "currentGroup" holds the group name, for with matching items are collected
// "groups" holds all identified groups
// "members" are items in the same group
//
// the function consumes one item and calls itself recursively with a reduced array
// in each iteration there are two possibilities:
//   a) group name matches "currentGroup"
//      => collect the name into same group
//   b) group names differ
//      => the previous group is added to "groups" with collected members
//      => start a new group collection
def process (array, currentGroup, groups, members)
  let itemName = $array[0]
  let itemGroup = $itemName[-4:]

  // last iteration, no more items
  // last group is collected but not included in the result yet!
  // => add it to the groups
  if (not($array))
    $groups + [ { "group": $currentGroup, "values": $members }]

  // item is a member of current group
  // add it to the $members
  else if ($itemGroup == $currentGroup)
    process ($array[1:], $currentGroup, $groups, $members + [ $itemName ])

  // group names do NOT match
  // emmit collected members of previous group to $groups
  // start collecting for new group
  else
    process ($array[1:], $itemGroup, $groups + [ { "group": $currentGroup, "values": $members }], [ $itemName ])

// initial call, with "groups" and "members" being empty
process (.data, .data[0][-4:], [], [])

For this input

{
  "data": [
    "AAAAAAAAAA1111",
    "AAAAAAAAAA1111",
    "BBBBBBBBBB2222",
    "BBBBBBBBBB2222",
    "EEEEEEEEEE9999",
    "EEEEEEEEEE9999",
    "CCCCCCCCCC5555",
    "CCCCCCCCCC3333",
    "CCCCCCCCCC3333",
    "DDDDDDDDDD7777",
    "DDDDDDDDDD7777"
  ]
}

I get:

[ {
  "group" : "1111",
  "values" : [ "AAAAAAAAAA1111", "AAAAAAAAAA1111" ]
}, {
  "group" : "2222",
  "values" : [ "BBBBBBBBBB2222", "BBBBBBBBBB2222", "BBBBBBBBBB2222" ]
}, {
  "group" : "9999",
  "values" : [ "EEEEEEEEEE9999", "EEEEEEEEEE9999" ]
}, {
  "group" : "5555",
  "values" : [ "CCCCCCCCCC5555" ]
}, {
  "group" : "3333",
  "values" : [ "CCCCCCCCCC3333", "CCCCCCCCCC3333", "CCCCCCCCCC3333" ]
}, {
  "group" : "7777",
  "values" : [ "DDDDDDDDDD7777", "DDDDDDDDDD7777" ]
} ]
jricardogon commented 9 months ago

Thank you so much! Exactly what I need. After many try i did this:

let arrayIndexed = zip-with-index(.data)

def children(start, end ) //$start+ " - "+ $end [for($arrayIndexed) .value if(.index >= $start and .index <= $end )]

let idxBreak = zip-with-index([for ($arrayIndexed)

let idx = .index
let groupCurrentLine =  .value[ -4 : ]
let groupNextLine =  $arrayIndexed[$idx + 1].value[ -4: ]

{"idx": $idx , "root" :  $groupCurrentLine }
if( $groupCurrentLine!= $groupNextLine)

])

let final = [for($idxBreak) if(.index == 0) {"root" : .value.root, "children": children(0, .value.idx) } else {"root" : .value.root, "children": children($idxBreak[.index - 1].value.idx + 1, .value.idx) } ]

{ "final" : $final }