Process Builder - Githubissues

m-mohr commented 3 years ago

A JS process builder, like in the Python or R clients. Feedback and suggestions are highly appreciated.

ToDos:

[ ] Migrate Math Parser from Web Editor to JS Client
[x] Write documentation

Example, creates the EVI process graph we usually use:

var builder = await con.buildProcess();

var datacube = builder.load_collection(
  new Parameter("collection-id", "string", "The ID of the collection to load"),
  { "west": 16.1, "east": 16.6, "north": 48.6, "south": 47.2 },
  ["2018-01-01", "2018-02-01"],
  ["B02", "B04", "B08"]
);

// Alternative 1: Use the Math Parser known from the Web Editor for Math (WIP)
var eviAlgorithm = new Math('2.5 * ($B08 - $B04) / (1 + $B08 + 6 * $B04 + (-7.5 * $B02))');
datacube = builder.reduce_dimension(datacube, eviAlgorithm, "bands")
                  .description("Compute the EVI. Formula: 2.5 * (NIR - RED) / (1 + NIR + 6*RED + -7.5*BLUE)");

var min = function(data) { return this.min(data); };
datacube = builder.reduce_dimension(datacube, min, "t");

datacube = builder.save_result(datacube, "PNG");

var storedProcess = await con.setUserProcess("evi", datacube);

You can also construct the eviAlgorithm yourself:

// Alternative 2: Construct the process graph for the EVI yourself
var eviAlgorithm = function(data) {
  var nir = data["B08"];
  var red = data["B04"];
  var blue = data["B02"];
  return this.multiply(
    2.5,
    this.divide(
      this.subtract(nir, red),
      this.sum([
        1,
        nir,
        this.multiply(6, red),
        this.multiply(-7.5, blue)
      ])
    )
  );
};

Resulting process graph:

{
  "id": "evi",
  "parameters": [
    {
      "name": "collection-id",
      "description": "The ID of the collection to load",
      "schema": {
        "type": "string"
      }
    }
  ],
  "process_graph": {
    "loadco1": {
      "process_id": "load_collection",
      "arguments": {
        "id": {"from_parameter": "collection-id"},
        "spatial_extent": {
          "west": 16.1,
          "east": 16.6,
          "north": 48.6,
          "south": 47.2
        },
        "temporal_extent": ["2018-01-01", "2018-02-01"],
        "bands": ["B02", "B04", "B08"]
      }
    },
    "reduce1": {
      "process_id": "reduce_dimension",
      "description": "Compute the EVI. Formula: 2.5 * (NIR - RED) / (1 + NIR + 6*RED + -7.5*BLUE)",
      "arguments": {
        "data": {"from_node": "loadco1"},
        "dimension": "bands",
        "reducer": {
          "process_graph": {
            "arraye1": {
              "process_id": "array_element",
              "arguments": {
                "data": {"from_parameter": "data"},
                "label": "B08"
              }
            },
            "arraye2": {
              "process_id": "array_element",
              "arguments": {
                "data": {"from_parameter": "data"},
                "label": "B04"
              }
            },
            "arraye3": {
              "process_id": "array_element",
              "arguments": {
                "data": {"from_parameter": "data"},
                "label": "B02"
              }
            },
            "subtra1": {
              "process_id": "subtract",
              "arguments": {
                "x": {"from_node": "arraye1"},
                "y": {"from_node": "arraye2"}
              }
            },
            "multip1": {
              "process_id": "multiply",
              "arguments": {
                "x": 6,
                "y": {"from_node": "arraye2"}
              }
            },
            "multip2": {
              "process_id": "multiply",
              "arguments": {
                "x": -7.5,
                "y": {"from_node": "arraye3"}
              }
            },
            "sum1": {
              "process_id": "sum",
              "arguments": {
                "data": [1, {"from_node": "arraye1"}, {"from_node": "multip1"}, {"from_node": "multip2"}]
              }
            },
            "divide1": {
              "process_id": "divide",
              "arguments": {
                "x": {"from_node": "subtra1"},
                "y": {"from_node": "sum1"}
              }
            },
            "multip3": {
              "process_id": "multiply",
              "arguments": {
                "x": 2.5,
                "y": {"from_node": "divide1"}
              },
              "result": true
            }
          }
        }
      }
    },
    "reduce2": {
      "process_id": "reduce_dimension",
      "arguments": {
        "data": {"from_node": "reduce1"},
        "dimension": "t",
        "reducer": {
          "process_graph": {
            "min1": {
              "process_id": "min",
              "arguments": {
                "data": {"from_parameter": "data"}
              },
              "result": true
            }
          }
        }
      }
    },
    "savere1": {
      "process_id": "save_result",
      "arguments": {
        "data": {"from_node": "reduce2"},
        "format": "PNG"
      },
      "result": true
    }
  }
}

bgoesswe commented 3 years ago

I really like it, it is similar to the one in the Python client and makes it much easier to work with openEO imho. Just two things:

Is it possible to provide something static for the dimensions like "bands", "t"? e.g. something like datacube.time, so that the client loads the concrete names from the backend and the user does not have to know how it is called there?
I am in general a little bit confused by new Parameter("collection-id", "string", "The ID of the collection to load") at "load_collection" and why this is necessary or at the same time not necessary at the other parameters (e.g. spatial extent). Is this just a placeholder?

Other than that I have nothing to complain about^^

m-mohr commented 3 years ago

Is it possible to provide something static for the dimensions like "bands", "t"? e.g. something like datacube.time, so that the client loads the concrete names from the backend and the user does not have to know how it is called there?

@bgoesswe No, not really. You can't load the dimension names reliably from the back-end for every step, it may be possible just for some steps if names have not changed from what they are in the original collection metadata. If that's implemented in Python, it implements it with some assumptions that are likely to break for some use cases. For example, it won't work in my example above as the collection id is unknown (it's a parameter, see below). Therefore, I'm not planning to implement that.

I am in general a little bit confused by new Parameter("collection-id", "string", "The ID of the collection to load") at "load_collection" and why this is necessary or at the same time not necessary at the other parameters (e.g. spatial extent). Is this just a placeholder?

That is just an example for a process parameter so that one could change the collection-id. It should show how you can do those unresolved "from_parameter".

bgoesswe commented 3 years ago

Ah, I see ok in this case it is not really possible. Afaik, in the python client it takes the default dimensions at the moment you call load_collection, but tracks change on the dimensions from the applied processes and also throws exceptions if accessed wrongly or if it does not exist anymore. So the metadata is stored in the datacube object. But I understand that this is not really suitable for the JS client.

m-mohr commented 3 years ago

Yes, Python implements all processes (and thus it's limited to them), but the JS client (and R, too) use a more generic approach, just generating functions from the processes and thus the implementation is very lightweight and flexible. JS in that case can't keep track of data cube changes.

jneijt commented 3 years ago

I like it a lot as well, looks very promising and easy to use. Would have spared me from many typos writing the JSON manually in the beginning :-)

One more question just for my understanding though: If I understand it right, the Builder only "knows about" the processes supported by the backend, right? So then I guess it's not possible to use it to build a kind of "generic" process graph while not being connected to any backend?

m-mohr commented 3 years ago

If I understand it right, the Builder only "knows about" the processes supported by the backend, right?

@jneijt An interesting idea. I didn't had that in mind, but was able to quickly implement that. It's now possible to request the processes from (a) a URL or (b) for a processes version (it then requests them from the official repository).

import { Builder } from '@openeo/js-client';
var builder = Builder.fromVersion(); // Loads the latest version if no version is passed
var datacube = builder.load_collection(...); // and so on...

So then I guess it's not possible to use it to build a kind of "generic" process graph while not being connected to any backend?

You need a set of processes for certain operations, but you can now at least use the official repo in a simple way as "back-end". Does that help?

jneijt commented 3 years ago

@m-mohr That looks very nice! As a bit of background why I thought of it: in the mobile app we have a general process graph that's used on all backends and only when it's requested, it is actually validated if the processes are supported. And of course this is not the main use-case for this feature but still I was thinking that it might be nice to be able to generate "standard"/"general" process graphs. Very cool addition, thanks! 👍

m-mohr commented 3 years ago

Thanks for the reviews. I'll merge this now and do the Math expression thing in a separate PR.

Open-EO / openeo-js-client

Process Builder #24