IBM / tensorlakehouse-openeo-driver

IBM's OpenSource reference implementation of the OpenEO driver
Apache License 2.0
6 stars 2 forks source link

support UDF #18

Open leotizzei opened 1 month ago

leotizzei commented 1 month ago

provide support for udf (see https://openeo.org/documentation/1.0/udfs.html#user-defined-functions). It seems tensorlakehouse-openeo-driver needs to register run_udf function

Traceback (most recent call last):
  File "/opt/app-root/lib64/python3.9/site-packages/flask/app.py", line 1484, in full_dispatch_request
    rv = self.dispatch_request()
  File "/opt/app-root/lib64/python3.9/site-packages/flask/app.py", line 1469, in dispatch_request
    return self.ensure_sync(self.view_functions[rule.endpoint])(**view_args)
  File "/opt/app-root/lib64/python3.9/site-packages/openeo_driver/users/auth.py", line 88, in decorated
    return f(*args, **kwargs)
  File "/opt/app-root/lib64/python3.9/site-packages/openeo_driver/views.py", line 676, in result
    result = backend_implementation.processing.evaluate(process_graph=process_graph, env=env)
  File "/opt/app-root/src/tensorlakehouse-openeo-driver/tensorlakehouse_openeo_driver/processing.py", line 114, in evaluate
    pg_callable = parsed_graph.to_callable(process_registry=self.process_registry)
  File "/opt/app-root/lib64/python3.9/site-packages/openeo_pg_parser_networkx/graph.py", line 318, in to_callable
    return self._map_node_to_callable(
  File "/opt/app-root/lib64/python3.9/site-packages/openeo_pg_parser_networkx/graph.py", line 347, in _map_node_to_callable
    parent_callable = self._map_node_to_callable(
  File "/opt/app-root/lib64/python3.9/site-packages/openeo_pg_parser_networkx/graph.py", line 355, in _map_node_to_callable
    callback = self._map_node_to_callable(
  File "/opt/app-root/lib64/python3.9/site-packages/openeo_pg_parser_networkx/graph.py", line 340, in _map_node_to_callable
    process_impl = process_registry[node_with_data["process_id"]].implementation
  File "/opt/app-root/lib64/python3.9/site-packages/openeo_pg_parser_networkx/process_registry.py", line 49, in __getitem__
    raise KeyError(
KeyError: 'Process run_udf not found in namespace predefined!'
2024-06-11 20:36:15,718 - openeo_driver.views - INFO - Handling GET https://tensorlakehouse-openeo-driver-nasageospatial-dev.cash.sl.cloud9.ibm.com/.well-known/openeo with data b''
leotizzei commented 1 week ago

example of how to run https://open-eo.github.io/openeo-python-client/udf.html#workflow-script

leotizzei commented 1 week ago

I'm getting a weird error when I pass the function as a comment

./tensorlakehouse_openeo_driver/tests/end_to_end/test_udf.py::test_udf Failed: [undefined]openeo.rest.OpenEoClientException: Failed to detect language of UDF code.
openeo_client = <Connection to 'https://tensorlakehouse-openeo-driver-geospatial-be-staging.apps.fmaas-backend.fmaas.res.ibm.com/openeo/1.2/' with OidcBearerAuth>

    def test_udf(openeo_client: openeo.Connection):
        spatial_extent = {
            "west": -121.5,
            "south": 44.0,
            "east": -121.25,
            "north": 44.25,
        }
        temporal_extent = [
            "2022-01-02T00:00:00Z",
            "2022-01-02T23:59:59Z",
        ]
        bands = ["B02"]
        datacube = openeo_client.load_collection(
            "HLSS30",
            spatial_extent=spatial_extent,
            temporal_extent=temporal_extent,
            bands=bands,
        )

        # Create a UDF object from inline source code.

        udf = openeo.UDF("""

        import xarray

        def apply_datacube(cube: xarray.DataArray, context: dict) -> xarray.DataArray:

            cube.values = 0.0001 * cube.values

            return cube

        """)
        # udf_script_path = (
        #     Path()
        #     / "tensorlakehouse_openeo_driver"
        #     / "tests"
        #     / "end_to_end"
        #     / "udf-code.py"
        # )
        # assert udf_script_path.exists()
        # udf = openeo.UDF.from_file(udf_script_path)
        # Pass UDF object as child process to `apply`.

>       rescaled = datacube.apply(process=udf)

tensorlakehouse_openeo_driver/tests/end_to_end/test_udf.py:52: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
../../../../.pyenv/versions/3.9.16/envs/tensorlakehouse-ee/lib/python3.9/site-packages/openeo/rest/datacube.py:1500: in apply
    "process": build_child_callback(process, parent_parameters=["x"], connection=self.connection),
../../../../.pyenv/versions/3.9.16/envs/tensorlakehouse-ee/lib/python3.9/site-packages/openeo/rest/_datacube.py:315: in build_child_callback
    pg = process.get_run_udf_callback(connection=connection, data_parameter=parent_parameters[0])
../../../../.pyenv/versions/3.9.16/envs/tensorlakehouse-ee/lib/python3.9/site-packages/openeo/rest/_datacube.py:266: in get_run_udf_callback
    runtime=self.get_runtime(connection=connection),
../../../../.pyenv/versions/3.9.16/envs/tensorlakehouse-ee/lib/python3.9/site-packages/openeo/rest/_datacube.py:172: in get_runtime
    return self._runtime or self._guess_runtime(connection=connection)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = <UDF runtime=None code='\n\n    import xarray\n\n\n    def apply_datacube(cube: xarray.DataArray, context: dict) -> xarray.DataArray:\n\n        cube.values = 0.0001 * cube.values\n\n        return cube\n\n    '>
connection = <Connection to 'https://tensorlakehouse-openeo-driver-geospatial-be-staging.apps.fmaas-backend.fmaas.res.ibm.com/openeo/1.2/' with OidcBearerAuth>

    def _guess_runtime(self, connection: Optional[Connection] = None) -> str:
        """Guess UDF runtime from UDF source (path) or source code."""
        # First, guess UDF language
        language = None
        if isinstance(self._source, pathlib.Path):
            language = self._guess_runtime_from_suffix(self._source.suffix)
        elif isinstance(self._source, str):
            url_match = re.match(
                r"https?://.*?(?P<suffix>\.\w+)([&#].*)?$", self._source
            )
            if url_match:
                language = self._guess_runtime_from_suffix(url_match.group("suffix"))
        if not language:
            # Guess language from UDF code
            if re.search(r"^def [\w0-9_]+\(", self.code, flags=re.MULTILINE):
                language = "Python"
            # TODO: detection heuristics for R and other languages?
        if not language:
>           raise OpenEoClientException("Failed to detect language of UDF code.")
E           openeo.rest.OpenEoClientException: Failed to detect language of UDF code.

../../../../.pyenv/versions/3.9.16/envs/tensorlakehouse-ee/lib/python3.9/site-packages/openeo/rest/_datacube.py:243: OpenEoClientException